[LLVM][SelectionDAG] Align poison/undef binop folds with IR. (#149334)
The "at construction" binop folds in SelectionDAG::getNode() has different behaviour when compared to the equivalent LLVM IR. This PR makes the behaviour consistent while also extending the coverage to include signed/unsigned max/min operations.
This commit is contained in:
parent
984ec02236
commit
13f38c97d5
@ -7843,20 +7843,43 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Perform trivial constant folding.
|
if (N1.getOpcode() == ISD::POISON || N2.getOpcode() == ISD::POISON) {
|
||||||
if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags))
|
switch (Opcode) {
|
||||||
return SV;
|
case ISD::XOR:
|
||||||
|
case ISD::ADD:
|
||||||
|
case ISD::PTRADD:
|
||||||
|
case ISD::SUB:
|
||||||
|
case ISD::SIGN_EXTEND_INREG:
|
||||||
|
case ISD::UDIV:
|
||||||
|
case ISD::SDIV:
|
||||||
|
case ISD::UREM:
|
||||||
|
case ISD::SREM:
|
||||||
|
case ISD::MUL:
|
||||||
|
case ISD::AND:
|
||||||
|
case ISD::SSUBSAT:
|
||||||
|
case ISD::USUBSAT:
|
||||||
|
case ISD::UMIN:
|
||||||
|
case ISD::OR:
|
||||||
|
case ISD::SADDSAT:
|
||||||
|
case ISD::UADDSAT:
|
||||||
|
case ISD::UMAX:
|
||||||
|
case ISD::SMAX:
|
||||||
|
case ISD::SMIN:
|
||||||
|
// fold op(arg1, poison) -> poison, fold op(poison, arg2) -> poison.
|
||||||
|
return N2.getOpcode() == ISD::POISON ? N2 : N1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Canonicalize an UNDEF to the RHS, even over a constant.
|
// Canonicalize an UNDEF to the RHS, even over a constant.
|
||||||
if (N1.isUndef()) {
|
if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() != ISD::UNDEF) {
|
||||||
if (TLI->isCommutativeBinOp(Opcode)) {
|
if (TLI->isCommutativeBinOp(Opcode)) {
|
||||||
std::swap(N1, N2);
|
std::swap(N1, N2);
|
||||||
} else {
|
} else {
|
||||||
switch (Opcode) {
|
switch (Opcode) {
|
||||||
case ISD::PTRADD:
|
case ISD::PTRADD:
|
||||||
case ISD::SUB:
|
case ISD::SUB:
|
||||||
// fold op(undef, arg2) -> undef, fold op(poison, arg2) ->poison.
|
// fold op(undef, non_undef_arg2) -> undef.
|
||||||
return N1.getOpcode() == ISD::POISON ? getPOISON(VT) : getUNDEF(VT);
|
return N1;
|
||||||
case ISD::SIGN_EXTEND_INREG:
|
case ISD::SIGN_EXTEND_INREG:
|
||||||
case ISD::UDIV:
|
case ISD::UDIV:
|
||||||
case ISD::SDIV:
|
case ISD::SDIV:
|
||||||
@ -7864,18 +7887,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
|||||||
case ISD::SREM:
|
case ISD::SREM:
|
||||||
case ISD::SSUBSAT:
|
case ISD::SSUBSAT:
|
||||||
case ISD::USUBSAT:
|
case ISD::USUBSAT:
|
||||||
// fold op(undef, arg2) -> 0, fold op(poison, arg2) -> poison.
|
// fold op(undef, non_undef_arg2) -> 0.
|
||||||
return N1.getOpcode() == ISD::POISON ? getPOISON(VT)
|
return getConstant(0, DL, VT);
|
||||||
: getConstant(0, DL, VT);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fold a bunch of operators when the RHS is undef.
|
// Fold a bunch of operators when the RHS is undef.
|
||||||
if (N2.isUndef()) {
|
if (N2.getOpcode() == ISD::UNDEF) {
|
||||||
switch (Opcode) {
|
switch (Opcode) {
|
||||||
case ISD::XOR:
|
case ISD::XOR:
|
||||||
if (N1.isUndef())
|
if (N1.getOpcode() == ISD::UNDEF)
|
||||||
// Handle undef ^ undef -> 0 special case. This is a common
|
// Handle undef ^ undef -> 0 special case. This is a common
|
||||||
// idiom (misuse).
|
// idiom (misuse).
|
||||||
return getConstant(0, DL, VT);
|
return getConstant(0, DL, VT);
|
||||||
@ -7883,29 +7905,48 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
|||||||
case ISD::ADD:
|
case ISD::ADD:
|
||||||
case ISD::PTRADD:
|
case ISD::PTRADD:
|
||||||
case ISD::SUB:
|
case ISD::SUB:
|
||||||
|
// fold op(arg1, undef) -> undef.
|
||||||
|
return N2;
|
||||||
case ISD::UDIV:
|
case ISD::UDIV:
|
||||||
case ISD::SDIV:
|
case ISD::SDIV:
|
||||||
case ISD::UREM:
|
case ISD::UREM:
|
||||||
case ISD::SREM:
|
case ISD::SREM:
|
||||||
// fold op(arg1, undef) -> undef, fold op(arg1, poison) -> poison.
|
// fold op(arg1, undef) -> poison.
|
||||||
return N2.getOpcode() == ISD::POISON ? getPOISON(VT) : getUNDEF(VT);
|
return getPOISON(VT);
|
||||||
case ISD::MUL:
|
case ISD::MUL:
|
||||||
case ISD::AND:
|
case ISD::AND:
|
||||||
case ISD::SSUBSAT:
|
case ISD::SSUBSAT:
|
||||||
case ISD::USUBSAT:
|
case ISD::USUBSAT:
|
||||||
// fold op(arg1, undef) -> 0, fold op(arg1, poison) -> poison.
|
case ISD::UMIN:
|
||||||
return N2.getOpcode() == ISD::POISON ? getPOISON(VT)
|
// fold op(undef, undef) -> undef, fold op(arg1, undef) -> 0.
|
||||||
: getConstant(0, DL, VT);
|
return N1.getOpcode() == ISD::UNDEF ? N2 : getConstant(0, DL, VT);
|
||||||
case ISD::OR:
|
case ISD::OR:
|
||||||
case ISD::SADDSAT:
|
case ISD::SADDSAT:
|
||||||
case ISD::UADDSAT:
|
case ISD::UADDSAT:
|
||||||
// fold op(arg1, undef) -> an all-ones constant, fold op(arg1, poison) ->
|
case ISD::UMAX:
|
||||||
// poison.
|
// fold op(undef, undef) -> undef, fold op(arg1, undef) -> -1.
|
||||||
return N2.getOpcode() == ISD::POISON ? getPOISON(VT)
|
return N1.getOpcode() == ISD::UNDEF ? N2 : getAllOnesConstant(DL, VT);
|
||||||
: getAllOnesConstant(DL, VT);
|
case ISD::SMAX:
|
||||||
|
// fold op(undef, undef) -> undef, fold op(arg1, undef) -> MAX_INT.
|
||||||
|
return N1.getOpcode() == ISD::UNDEF
|
||||||
|
? N2
|
||||||
|
: getConstant(
|
||||||
|
APInt::getSignedMaxValue(VT.getScalarSizeInBits()), DL,
|
||||||
|
VT);
|
||||||
|
case ISD::SMIN:
|
||||||
|
// fold op(undef, undef) -> undef, fold op(arg1, undef) -> MIN_INT.
|
||||||
|
return N1.getOpcode() == ISD::UNDEF
|
||||||
|
? N2
|
||||||
|
: getConstant(
|
||||||
|
APInt::getSignedMinValue(VT.getScalarSizeInBits()), DL,
|
||||||
|
VT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Perform trivial constant folding.
|
||||||
|
if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags))
|
||||||
|
return SV;
|
||||||
|
|
||||||
// Memoize this node if possible.
|
// Memoize this node if possible.
|
||||||
SDNode *N;
|
SDNode *N;
|
||||||
SDVTList VTs = getVTList(VT);
|
SDVTList VTs = getVTList(VT);
|
||||||
|
@ -4,7 +4,6 @@
|
|||||||
define i32 @f(i32 %a0) {
|
define i32 @f(i32 %a0) {
|
||||||
; CHECK-LABEL: f:
|
; CHECK-LABEL: f:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: mov w0, wzr
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%1 = lshr i32 %a0, 2147483647
|
%1 = lshr i32 %a0, 2147483647
|
||||||
%2 = add i32 %1, 2147483647
|
%2 = add i32 %1, 2147483647
|
||||||
|
@ -235,7 +235,7 @@ define <3 x i16> @v_saddsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
|
|||||||
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
||||||
; GFX6-NEXT: v_med3_i32 v3, v2, s4, v4
|
; GFX6-NEXT: v_med3_i32 v3, v2, s4, v4
|
||||||
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
|
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
|
||||||
; GFX6-NEXT: v_or_b32_e32 v2, 0xffff0000, v3
|
; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
|
||||||
; GFX6-NEXT: v_alignbit_b32 v1, v3, v1, 16
|
; GFX6-NEXT: v_alignbit_b32 v1, v3, v1, 16
|
||||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
|
@ -202,10 +202,9 @@ define <3 x i16> @v_uaddsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
|
|||||||
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v5
|
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v5
|
||||||
; GFX6-NEXT: v_min_u32_e32 v0, 0xffff, v0
|
; GFX6-NEXT: v_min_u32_e32 v0, 0xffff, v0
|
||||||
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
||||||
; GFX6-NEXT: v_min_u32_e32 v3, 0xffff, v2
|
; GFX6-NEXT: v_min_u32_e32 v2, 0xffff, v2
|
||||||
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
|
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
|
||||||
; GFX6-NEXT: v_or_b32_e32 v2, 0xffff0000, v3
|
; GFX6-NEXT: v_alignbit_b32 v1, v2, v1, 16
|
||||||
; GFX6-NEXT: v_alignbit_b32 v1, v3, v1, 16
|
|
||||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX8-LABEL: v_uaddsat_v3i16:
|
; GFX8-LABEL: v_uaddsat_v3i16:
|
||||||
|
@ -604,18 +604,18 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) {
|
|||||||
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v8i8:
|
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v8i8:
|
||||||
; GFX7-SDAG: ; %bb.0: ; %entry
|
; GFX7-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v2, v2, v6
|
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v3, v3, v7
|
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v5, v3
|
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v4, v2
|
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
||||||
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v4
|
||||||
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v1, v1, v5
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v3, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v2, v6
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1
|
||||||
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
@ -698,15 +698,15 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) {
|
|||||||
; GFX9-SDAG-LABEL: test_vector_reduce_smax_v8i8:
|
; GFX9-SDAG-LABEL: test_vector_reduce_smax_v8i8:
|
||||||
; GFX9-SDAG: ; %bb.0: ; %entry
|
; GFX9-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX9-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
; GFX9-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX9-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX9-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
||||||
; GFX9-SDAG-NEXT: v_max_i16_sdwa v3, sext(v3), sext(v7) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v5) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_max3_i16 v1, v1, v5, v3
|
; GFX9-SDAG-NEXT: v_max3_i16 v1, v1, v3, v7
|
||||||
; GFX9-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
|
; GFX9-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
|
||||||
; GFX9-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
|
; GFX9-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
; GFX9-SDAG-NEXT: v_max_i16_sdwa v2, sext(v2), sext(v6) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
||||||
; GFX9-SDAG-NEXT: v_max3_i16 v0, v0, v4, v2
|
; GFX9-SDAG-NEXT: v_max3_i16 v0, v0, v2, v6
|
||||||
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX9-SDAG-NEXT: v_max_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_max_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
@ -741,20 +741,20 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) {
|
|||||||
; GFX10-SDAG-LABEL: test_vector_reduce_smax_v8i8:
|
; GFX10-SDAG-LABEL: test_vector_reduce_smax_v8i8:
|
||||||
; GFX10-SDAG: ; %bb.0: ; %entry
|
; GFX10-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_max_i16 v3, v3, v7
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_max3_i16 v1, v1, v5, v3
|
; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v6, 0, 8
|
; GFX10-SDAG-NEXT: v_max_i16 v1, v1, v5
|
||||||
|
; GFX10-SDAG-NEXT: v_max3_i16 v1, v1, v3, v7
|
||||||
|
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v4, 0, 8
|
||||||
|
; GFX10-SDAG-NEXT: v_bfe_i32 v4, v6, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX10-SDAG-NEXT: v_max_i16 v2, v2, v3
|
; GFX10-SDAG-NEXT: v_max_i16 v0, v0, v3
|
||||||
; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX10-SDAG-NEXT: v_max3_i16 v0, v0, v4, v2
|
; GFX10-SDAG-NEXT: v_max3_i16 v0, v0, v2, v4
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_max_i16 v0, v0, v1
|
; GFX10-SDAG-NEXT: v_max_i16 v0, v0, v1
|
||||||
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
@ -796,62 +796,62 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) {
|
|||||||
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v8i8:
|
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v8i8:
|
||||||
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
|
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v3, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v1, 0, 8
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v3.l
|
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v3.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v1.l, v5.l, v3.l, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v6, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v5.l, v3.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v7, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v6.l, 8, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v0.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v6
|
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v5
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v2.l, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v2.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smax_v8i8:
|
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smax_v8i8:
|
||||||
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
|
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v3, v3, v7
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v5, v3
|
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v1, v1, v5
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v6, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v3, v7
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v6, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v2, v2, v3
|
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v3
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v4, v2
|
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v2, v4
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1
|
||||||
@ -906,39 +906,39 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) {
|
|||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v3, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v1, 0, 8
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v3.l
|
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v3.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v1.l, v5.l, v3.l, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v6, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v5.l, v3.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v7, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v6.l, 8, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v0.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v6
|
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v5
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v2.l, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v2.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
@ -949,23 +949,23 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) {
|
|||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v3, v3, v7
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v5, v3
|
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v1, v1, v5
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v6, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v3, v7
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v6, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v2, v2, v3
|
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v3
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v4, v2
|
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v2, v4
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1
|
||||||
@ -1025,32 +1025,32 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) {
|
|||||||
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v16i8:
|
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v16i8:
|
||||||
; GFX7-SDAG: ; %bb.0: ; %entry
|
; GFX7-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v7, v7, v15
|
; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v6, v6, v14
|
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v4, v4, v12
|
; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v5, v5, v13
|
; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 8
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v1, v1, v9
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v1, v1, v9
|
||||||
; GFX7-SDAG-NEXT: v_max3_i32 v2, v2, v10, v6
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v8
|
||||||
; GFX7-SDAG-NEXT: v_max3_i32 v3, v3, v11, v7
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v6, v6, v14
|
||||||
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v5, v3
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v2, v2, v10
|
||||||
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v4, v2
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v7, v7, v15
|
||||||
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v3, v3, v11
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v4, v12
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v5, v13
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v3, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v2, v6
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1
|
||||||
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
@ -1165,21 +1165,21 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) {
|
|||||||
; GFX9-SDAG-LABEL: test_vector_reduce_smax_v16i8:
|
; GFX9-SDAG-LABEL: test_vector_reduce_smax_v16i8:
|
||||||
; GFX9-SDAG: ; %bb.0: ; %entry
|
; GFX9-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX9-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8
|
; GFX9-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8
|
||||||
; GFX9-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
; GFX9-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
; GFX9-SDAG-NEXT: v_max_i16_sdwa v7, sext(v7), sext(v15) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
|
||||||
; GFX9-SDAG-NEXT: v_max_i16_sdwa v5, sext(v5), sext(v13) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
|
||||||
; GFX9-SDAG-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_max3_i16 v3, v3, v11, v7
|
; GFX9-SDAG-NEXT: v_max_i16_sdwa v7, sext(v7), sext(v15) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8
|
; GFX9-SDAG-NEXT: v_max_i16_sdwa v3, sext(v3), sext(v11) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
; GFX9-SDAG-NEXT: v_max3_i16 v1, v1, v5, v13
|
||||||
; GFX9-SDAG-NEXT: v_max_i16_sdwa v6, sext(v6), sext(v14) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 8
|
||||||
; GFX9-SDAG-NEXT: v_max3_i16 v1, v1, v5, v3
|
; GFX9-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
|
||||||
; GFX9-SDAG-NEXT: v_max_i16_sdwa v4, sext(v4), sext(v12) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
|
||||||
; GFX9-SDAG-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_max3_i16 v2, v2, v10, v6
|
; GFX9-SDAG-NEXT: v_max3_i16 v1, v1, v3, v7
|
||||||
|
; GFX9-SDAG-NEXT: v_max_i16_sdwa v6, sext(v6), sext(v14) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
|
; GFX9-SDAG-NEXT: v_max_i16_sdwa v2, sext(v2), sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
|
; GFX9-SDAG-NEXT: v_max3_i16 v0, v0, v4, v12
|
||||||
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
||||||
; GFX9-SDAG-NEXT: v_max3_i16 v0, v0, v4, v2
|
; GFX9-SDAG-NEXT: v_max3_i16 v0, v0, v2, v6
|
||||||
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX9-SDAG-NEXT: v_max_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_max_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
@ -1222,34 +1222,34 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) {
|
|||||||
; GFX10-SDAG-LABEL: test_vector_reduce_smax_v16i8:
|
; GFX10-SDAG-LABEL: test_vector_reduce_smax_v16i8:
|
||||||
; GFX10-SDAG: ; %bb.0: ; %entry
|
; GFX10-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8
|
||||||
|
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_max_i16 v7, v7, v15
|
; GFX10-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_max_i16 v5, v5, v13
|
|
||||||
; GFX10-SDAG-NEXT: v_max_i16 v1, v1, v9
|
; GFX10-SDAG-NEXT: v_max_i16 v1, v1, v9
|
||||||
; GFX10-SDAG-NEXT: v_max3_i16 v3, v3, v11, v7
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v7, v14, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_max3_i16 v1, v1, v5, v3
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v12, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_max_i16 v5, v6, v7
|
; GFX10-SDAG-NEXT: v_bfe_i32 v9, v12, 0, 8
|
||||||
|
; GFX10-SDAG-NEXT: v_max_i16 v7, v7, v15
|
||||||
|
; GFX10-SDAG-NEXT: v_max_i16 v3, v3, v11
|
||||||
|
; GFX10-SDAG-NEXT: v_max3_i16 v1, v1, v5, v13
|
||||||
|
; GFX10-SDAG-NEXT: v_bfe_i32 v5, v8, 0, 8
|
||||||
|
; GFX10-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
|
||||||
|
; GFX10-SDAG-NEXT: v_bfe_i32 v8, v10, 0, 8
|
||||||
|
; GFX10-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
|
||||||
|
; GFX10-SDAG-NEXT: v_max3_i16 v1, v1, v3, v7
|
||||||
|
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v14, 0, 8
|
||||||
|
; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
|
; GFX10-SDAG-NEXT: v_max_i16 v0, v0, v5
|
||||||
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX10-SDAG-NEXT: v_max_i16 v3, v4, v3
|
; GFX10-SDAG-NEXT: v_max_i16 v3, v6, v3
|
||||||
; GFX10-SDAG-NEXT: v_max_i16 v0, v0, v8
|
; GFX10-SDAG-NEXT: v_max_i16 v2, v2, v8
|
||||||
; GFX10-SDAG-NEXT: v_max3_i16 v2, v2, v10, v5
|
; GFX10-SDAG-NEXT: v_max3_i16 v0, v0, v4, v9
|
||||||
; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX10-SDAG-NEXT: v_max3_i16 v0, v0, v3, v2
|
; GFX10-SDAG-NEXT: v_max3_i16 v0, v0, v2, v3
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_max_i16 v0, v0, v1
|
; GFX10-SDAG-NEXT: v_max_i16 v0, v0, v1
|
||||||
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
@ -1307,59 +1307,58 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) {
|
|||||||
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v16i8:
|
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v16i8:
|
||||||
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
|
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v2, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v4, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v5.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v15.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v9.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v17.l, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v9.l
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v0, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v11, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v11.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v15, v2, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v4, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v14, 0, 8
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v14, v6, 0, 8
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v0, 0, 8
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v15.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v6, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v13, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v2.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v10, v10, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v0, 0, 8
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v3, 0, 8
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v11.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v13.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v15.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v11.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l
|
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.h, v4.l, v5.l
|
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.h, v4.l, v5.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v12.l
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v9, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v6.l, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v6.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v14.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v14.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v10, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v17, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.h, v0.h, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.h, v0.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v11, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v17.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v18, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v9, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.l
|
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v4.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v12.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
|
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v4.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v5.l, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v5.l, v0.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.h, v2.l, v3.l, v0.h
|
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.h, v0.h, v2.l, v3.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v0.h
|
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.h, v0.l, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
|
||||||
@ -1368,37 +1367,37 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) {
|
|||||||
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smax_v16i8:
|
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smax_v16i8:
|
||||||
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
|
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v7, v7, v15
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v10, v10, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v5, v5, v13
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v1, v1, v9
|
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v1, v1, v9
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v3, v3, v11, v7
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v14, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v8, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v5, v3
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v12, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v5, v6, v7
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v12, 0, 8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v7, v7, v15
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v3, v3, v11
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v5, v13
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v8, 0, 8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v10, 0, 8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v3, v7
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v14, 0, 8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v5
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v3, v4, v3
|
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v3, v6, v3
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v8
|
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v2, v2, v8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v2, v2, v10, v5
|
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v4, v9
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v3, v2
|
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v2, v3
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1
|
||||||
@ -1468,59 +1467,58 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) {
|
|||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v2, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v4, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v5.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v15.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v9.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v17.l, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v9.l
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v0, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v11, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v11.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v15, v2, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v4, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v14, 0, 8
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v14, v6, 0, 8
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v0, 0, 8
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v15.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v6, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v13, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v2.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v10, v10, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v0, 0, 8
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v3, 0, 8
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v11.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v13.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v15.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v11.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l
|
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.h, v4.l, v5.l
|
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.h, v4.l, v5.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v12.l
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v9, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v6.l, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v6.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v14.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v14.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v10, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v17, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.h, v0.h, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.h, v0.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v11, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v17.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v18, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v9, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.l
|
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v4.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v12.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
|
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v4.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v5.l, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v5.l, v0.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.h, v2.l, v3.l, v0.h
|
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.h, v0.h, v2.l, v3.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v0.h
|
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.h, v0.l, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
|
||||||
@ -1533,37 +1531,37 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) {
|
|||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v7, v7, v15
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v10, v10, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v5, v5, v13
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v1, v1, v9
|
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v1, v1, v9
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v3, v3, v11, v7
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v14, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v8, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v5, v3
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v12, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v5, v6, v7
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v12, 0, 8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v7, v7, v15
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v3, v3, v11
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v5, v13
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v8, 0, 8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v10, 0, 8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v3, v7
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v14, 0, 8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v5
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v3, v4, v3
|
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v3, v6, v3
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v8
|
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v2, v2, v8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v2, v2, v10, v5
|
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v4, v9
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v3, v2
|
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v2, v3
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1
|
||||||
@ -2055,18 +2053,18 @@ define i16 @test_vector_reduce_smax_v8i16(<8 x i16> %v) {
|
|||||||
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v8i16:
|
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v8i16:
|
||||||
; GFX7-SDAG: ; %bb.0: ; %entry
|
; GFX7-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v2, v2, v6
|
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v3, v3, v7
|
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v5, v3
|
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v4, v2
|
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16
|
||||||
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v4
|
||||||
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v1, v1, v5
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v3, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v2, v6
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1
|
||||||
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
@ -2253,32 +2251,32 @@ define i16 @test_vector_reduce_smax_v16i16(<16 x i16> %v) {
|
|||||||
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v16i16:
|
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v16i16:
|
||||||
; GFX7-SDAG: ; %bb.0: ; %entry
|
; GFX7-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v7, v7, v15
|
; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v6, v6, v14
|
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v4, v4, v12
|
; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v5, v5, v13
|
; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 16
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 16
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v1, v1, v9
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v1, v1, v9
|
||||||
; GFX7-SDAG-NEXT: v_max3_i32 v2, v2, v10, v6
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v8
|
||||||
; GFX7-SDAG-NEXT: v_max3_i32 v3, v3, v11, v7
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v6, v6, v14
|
||||||
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v5, v3
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v2, v2, v10
|
||||||
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v4, v2
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v7, v7, v15
|
||||||
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v3, v3, v11
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v4, v12
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v5, v13
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v3, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v2, v6
|
||||||
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1
|
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1
|
||||||
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
|
@ -604,18 +604,18 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) {
|
|||||||
; GFX7-SDAG-LABEL: test_vector_reduce_smin_v8i8:
|
; GFX7-SDAG-LABEL: test_vector_reduce_smin_v8i8:
|
||||||
; GFX7-SDAG: ; %bb.0: ; %entry
|
; GFX7-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v2, v2, v6
|
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v3, v3, v7
|
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v5, v3
|
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v4, v2
|
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
||||||
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v4
|
||||||
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v1, v1, v5
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v3, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v2, v6
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1
|
||||||
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
@ -698,15 +698,15 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) {
|
|||||||
; GFX9-SDAG-LABEL: test_vector_reduce_smin_v8i8:
|
; GFX9-SDAG-LABEL: test_vector_reduce_smin_v8i8:
|
||||||
; GFX9-SDAG: ; %bb.0: ; %entry
|
; GFX9-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX9-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
; GFX9-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX9-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX9-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
||||||
; GFX9-SDAG-NEXT: v_min_i16_sdwa v3, sext(v3), sext(v7) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_min_i16_sdwa v1, sext(v1), sext(v5) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_min3_i16 v1, v1, v5, v3
|
; GFX9-SDAG-NEXT: v_min3_i16 v1, v1, v3, v7
|
||||||
; GFX9-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
|
; GFX9-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
|
||||||
; GFX9-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
|
; GFX9-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
; GFX9-SDAG-NEXT: v_min_i16_sdwa v2, sext(v2), sext(v6) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_min_i16_sdwa v0, sext(v0), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
||||||
; GFX9-SDAG-NEXT: v_min3_i16 v0, v0, v4, v2
|
; GFX9-SDAG-NEXT: v_min3_i16 v0, v0, v2, v6
|
||||||
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX9-SDAG-NEXT: v_min_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_min_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
@ -741,20 +741,20 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) {
|
|||||||
; GFX10-SDAG-LABEL: test_vector_reduce_smin_v8i8:
|
; GFX10-SDAG-LABEL: test_vector_reduce_smin_v8i8:
|
||||||
; GFX10-SDAG: ; %bb.0: ; %entry
|
; GFX10-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_min_i16 v3, v3, v7
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_min3_i16 v1, v1, v5, v3
|
; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v6, 0, 8
|
; GFX10-SDAG-NEXT: v_min_i16 v1, v1, v5
|
||||||
|
; GFX10-SDAG-NEXT: v_min3_i16 v1, v1, v3, v7
|
||||||
|
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v4, 0, 8
|
||||||
|
; GFX10-SDAG-NEXT: v_bfe_i32 v4, v6, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX10-SDAG-NEXT: v_min_i16 v2, v2, v3
|
; GFX10-SDAG-NEXT: v_min_i16 v0, v0, v3
|
||||||
; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX10-SDAG-NEXT: v_min3_i16 v0, v0, v4, v2
|
; GFX10-SDAG-NEXT: v_min3_i16 v0, v0, v2, v4
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_min_i16 v0, v0, v1
|
; GFX10-SDAG-NEXT: v_min_i16 v0, v0, v1
|
||||||
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
@ -796,62 +796,62 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) {
|
|||||||
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v8i8:
|
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v8i8:
|
||||||
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
|
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v3, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v1, 0, 8
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v3.l
|
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v3.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v1.l, v5.l, v3.l, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v6, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v5.l, v3.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v7, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v6.l, 8, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v0.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v6
|
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v5
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v2.l, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v2.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smin_v8i8:
|
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smin_v8i8:
|
||||||
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
|
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v3, v3, v7
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v5, v3
|
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v1, v1, v5
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v6, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v3, v7
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v6, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v2, v2, v3
|
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v3
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v4, v2
|
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v2, v4
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1
|
||||||
@ -906,39 +906,39 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) {
|
|||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v3, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v1, 0, 8
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v3.l
|
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v3.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v1.l, v5.l, v3.l, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v6, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v5.l, v3.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v7, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v6.l, 8, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v0.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v6
|
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v5
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v2.l, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v2.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
@ -949,23 +949,23 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) {
|
|||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v3, v3, v7
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v5, v3
|
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v1, v1, v5
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v6, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v3, v7
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v4, 0, 8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v6, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v2, v2, v3
|
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v3
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v4, v2
|
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v2, v4
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1
|
||||||
@ -1025,32 +1025,32 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) {
|
|||||||
; GFX7-SDAG-LABEL: test_vector_reduce_smin_v16i8:
|
; GFX7-SDAG-LABEL: test_vector_reduce_smin_v16i8:
|
||||||
; GFX7-SDAG: ; %bb.0: ; %entry
|
; GFX7-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v7, v7, v15
|
; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v6, v6, v14
|
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v4, v4, v12
|
; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v5, v5, v13
|
; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 8
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v1, v1, v9
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v1, v1, v9
|
||||||
; GFX7-SDAG-NEXT: v_min3_i32 v2, v2, v10, v6
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v8
|
||||||
; GFX7-SDAG-NEXT: v_min3_i32 v3, v3, v11, v7
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v6, v6, v14
|
||||||
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v5, v3
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v2, v2, v10
|
||||||
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v4, v2
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v7, v7, v15
|
||||||
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v3, v3, v11
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v4, v12
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v5, v13
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v3, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v2, v6
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1
|
||||||
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
@ -1165,21 +1165,21 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) {
|
|||||||
; GFX9-SDAG-LABEL: test_vector_reduce_smin_v16i8:
|
; GFX9-SDAG-LABEL: test_vector_reduce_smin_v16i8:
|
||||||
; GFX9-SDAG: ; %bb.0: ; %entry
|
; GFX9-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX9-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8
|
; GFX9-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8
|
||||||
; GFX9-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
; GFX9-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
; GFX9-SDAG-NEXT: v_min_i16_sdwa v7, sext(v7), sext(v15) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
|
||||||
; GFX9-SDAG-NEXT: v_min_i16_sdwa v5, sext(v5), sext(v13) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
|
||||||
; GFX9-SDAG-NEXT: v_min_i16_sdwa v1, sext(v1), sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_min_i16_sdwa v1, sext(v1), sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_min3_i16 v3, v3, v11, v7
|
; GFX9-SDAG-NEXT: v_min_i16_sdwa v7, sext(v7), sext(v15) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8
|
; GFX9-SDAG-NEXT: v_min_i16_sdwa v3, sext(v3), sext(v11) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
; GFX9-SDAG-NEXT: v_min3_i16 v1, v1, v5, v13
|
||||||
; GFX9-SDAG-NEXT: v_min_i16_sdwa v6, sext(v6), sext(v14) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 8
|
||||||
; GFX9-SDAG-NEXT: v_min3_i16 v1, v1, v5, v3
|
; GFX9-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
|
||||||
; GFX9-SDAG-NEXT: v_min_i16_sdwa v4, sext(v4), sext(v12) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
|
||||||
; GFX9-SDAG-NEXT: v_min_i16_sdwa v0, sext(v0), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_min_i16_sdwa v0, sext(v0), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_min3_i16 v2, v2, v10, v6
|
; GFX9-SDAG-NEXT: v_min3_i16 v1, v1, v3, v7
|
||||||
|
; GFX9-SDAG-NEXT: v_min_i16_sdwa v6, sext(v6), sext(v14) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
|
; GFX9-SDAG-NEXT: v_min_i16_sdwa v2, sext(v2), sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
|
; GFX9-SDAG-NEXT: v_min3_i16 v0, v0, v4, v12
|
||||||
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
||||||
; GFX9-SDAG-NEXT: v_min3_i16 v0, v0, v4, v2
|
; GFX9-SDAG-NEXT: v_min3_i16 v0, v0, v2, v6
|
||||||
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX9-SDAG-NEXT: v_min_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_min_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
@ -1222,34 +1222,34 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) {
|
|||||||
; GFX10-SDAG-LABEL: test_vector_reduce_smin_v16i8:
|
; GFX10-SDAG-LABEL: test_vector_reduce_smin_v16i8:
|
||||||
; GFX10-SDAG: ; %bb.0: ; %entry
|
; GFX10-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8
|
||||||
|
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_min_i16 v7, v7, v15
|
; GFX10-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_min_i16 v5, v5, v13
|
|
||||||
; GFX10-SDAG-NEXT: v_min_i16 v1, v1, v9
|
; GFX10-SDAG-NEXT: v_min_i16 v1, v1, v9
|
||||||
; GFX10-SDAG-NEXT: v_min3_i16 v3, v3, v11, v7
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v7, v14, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_min3_i16 v1, v1, v5, v3
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v12, 0, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_min_i16 v5, v6, v7
|
; GFX10-SDAG-NEXT: v_bfe_i32 v9, v12, 0, 8
|
||||||
|
; GFX10-SDAG-NEXT: v_min_i16 v7, v7, v15
|
||||||
|
; GFX10-SDAG-NEXT: v_min_i16 v3, v3, v11
|
||||||
|
; GFX10-SDAG-NEXT: v_min3_i16 v1, v1, v5, v13
|
||||||
|
; GFX10-SDAG-NEXT: v_bfe_i32 v5, v8, 0, 8
|
||||||
|
; GFX10-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
|
||||||
|
; GFX10-SDAG-NEXT: v_bfe_i32 v8, v10, 0, 8
|
||||||
|
; GFX10-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
|
||||||
|
; GFX10-SDAG-NEXT: v_min3_i16 v1, v1, v3, v7
|
||||||
|
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v14, 0, 8
|
||||||
|
; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
|
; GFX10-SDAG-NEXT: v_min_i16 v0, v0, v5
|
||||||
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX10-SDAG-NEXT: v_min_i16 v3, v4, v3
|
; GFX10-SDAG-NEXT: v_min_i16 v3, v6, v3
|
||||||
; GFX10-SDAG-NEXT: v_min_i16 v0, v0, v8
|
; GFX10-SDAG-NEXT: v_min_i16 v2, v2, v8
|
||||||
; GFX10-SDAG-NEXT: v_min3_i16 v2, v2, v10, v5
|
; GFX10-SDAG-NEXT: v_min3_i16 v0, v0, v4, v9
|
||||||
; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX10-SDAG-NEXT: v_min3_i16 v0, v0, v3, v2
|
; GFX10-SDAG-NEXT: v_min3_i16 v0, v0, v2, v3
|
||||||
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX10-SDAG-NEXT: v_min_i16 v0, v0, v1
|
; GFX10-SDAG-NEXT: v_min_i16 v0, v0, v1
|
||||||
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
@ -1307,59 +1307,58 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) {
|
|||||||
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v16i8:
|
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v16i8:
|
||||||
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
|
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v2, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v4, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v5.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v15.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v9.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v17.l, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v9.l
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v0, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v11, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v11.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v15, v2, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v4, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v14, 0, 8
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v14, v6, 0, 8
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v0, 0, 8
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v15.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v6, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v13, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v2.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v10, v10, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v0, 0, 8
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v3, 0, 8
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v11.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v13.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v15.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v11.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l
|
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.h, v4.l, v5.l
|
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.h, v4.l, v5.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v12.l
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v9, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v6.l, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v6.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v14.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v14.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v10, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v17, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.h, v0.h, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.h, v0.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v11, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v17.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v18, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v9, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.l
|
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v4.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v12.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
|
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v4.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v5.l, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v5.l, v0.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.h, v2.l, v3.l, v0.h
|
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.h, v0.h, v2.l, v3.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
|
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v0.h
|
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.h, v0.l, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
|
||||||
@ -1368,37 +1367,37 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) {
|
|||||||
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smin_v16i8:
|
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smin_v16i8:
|
||||||
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
|
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v7, v7, v15
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v10, v10, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v5, v5, v13
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v1, v1, v9
|
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v1, v1, v9
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v3, v3, v11, v7
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v14, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v8, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v5, v3
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v12, 0, 8
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v5, v6, v7
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v12, 0, 8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v7, v7, v15
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v3, v3, v11
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v5, v13
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v8, 0, 8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v10, 0, 8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v3, v7
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v14, 0, 8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v5
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v3, v4, v3
|
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v3, v6, v3
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v8
|
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v2, v2, v8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v2, v2, v10, v5
|
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v4, v9
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v3, v2
|
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v2, v3
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1
|
||||||
@ -1468,59 +1467,58 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) {
|
|||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v2, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v4, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v5.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v15.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v9.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v17.l, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v9.l
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v0, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v11, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v11.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v15, v2, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v4, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v14, 0, 8
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v14, v6, 0, 8
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v0, 0, 8
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v15.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v6, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v13, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v2.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v10, v10, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v0, 0, 8
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v3, 0, 8
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v11.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v13.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v15.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v11.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l
|
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.h, v4.l, v5.l
|
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.h, v4.l, v5.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v12.l
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v9, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v6.l, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v6.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v14.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v14.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v10, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v17, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.h, v0.h, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.h, v0.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v11, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v17.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v18, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v9, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.l
|
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v4.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v12.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
|
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v4.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v5.l, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v5.l, v0.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.h, v2.l, v3.l, v0.h
|
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.h, v0.h, v2.l, v3.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
|
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v0.h
|
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.h, v0.l, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
|
||||||
@ -1533,37 +1531,37 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) {
|
|||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v7, v7, v15
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v10, v10, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v5, v5, v13
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v1, v1, v9
|
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v1, v1, v9
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v3, v3, v11, v7
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v14, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v8, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v5, v3
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v12, 0, 8
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v5, v6, v7
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v12, 0, 8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v7, v7, v15
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v3, v3, v11
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v5, v13
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v8, 0, 8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v10, 0, 8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v3, v7
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v14, 0, 8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v5
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v3, v4, v3
|
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v3, v6, v3
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v8
|
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v2, v2, v8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v2, v2, v10, v5
|
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v4, v9
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v3, v2
|
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v2, v3
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1
|
||||||
@ -2055,18 +2053,18 @@ define i16 @test_vector_reduce_smin_v8i16(<8 x i16> %v) {
|
|||||||
; GFX7-SDAG-LABEL: test_vector_reduce_smin_v8i16:
|
; GFX7-SDAG-LABEL: test_vector_reduce_smin_v8i16:
|
||||||
; GFX7-SDAG: ; %bb.0: ; %entry
|
; GFX7-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v2, v2, v6
|
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v3, v3, v7
|
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v5, v3
|
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v4, v2
|
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16
|
||||||
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v4
|
||||||
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v1, v1, v5
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v3, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v2, v6
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1
|
||||||
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
@ -2253,32 +2251,32 @@ define i16 @test_vector_reduce_smin_v16i16(<16 x i16> %v) {
|
|||||||
; GFX7-SDAG-LABEL: test_vector_reduce_smin_v16i16:
|
; GFX7-SDAG-LABEL: test_vector_reduce_smin_v16i16:
|
||||||
; GFX7-SDAG: ; %bb.0: ; %entry
|
; GFX7-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
|
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16
|
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v7, v7, v15
|
; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v6, v6, v14
|
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v4, v4, v12
|
; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v8
|
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v5, v5, v13
|
; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 16
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 16
|
||||||
|
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v1, v1, v9
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v1, v1, v9
|
||||||
; GFX7-SDAG-NEXT: v_min3_i32 v2, v2, v10, v6
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v8
|
||||||
; GFX7-SDAG-NEXT: v_min3_i32 v3, v3, v11, v7
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v6, v6, v14
|
||||||
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v5, v3
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v2, v2, v10
|
||||||
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v4, v2
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v7, v7, v15
|
||||||
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v3, v3, v11
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v4, v12
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v5, v13
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v3, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v2, v6
|
||||||
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1
|
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1
|
||||||
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
|
@ -320,7 +320,7 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) {
|
|||||||
; GFX8-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX8-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX8-SDAG-NEXT: v_max_u16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX8-SDAG-NEXT: v_max_u16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX8-SDAG-NEXT: v_max_u16_e32 v0, v0, v1
|
; GFX8-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
||||||
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX8-GISEL-LABEL: test_vector_reduce_umax_v4i8:
|
; GFX8-GISEL-LABEL: test_vector_reduce_umax_v4i8:
|
||||||
@ -351,8 +351,9 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) {
|
|||||||
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX9-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
; GFX9-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
||||||
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
|
; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
|
; GFX9-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v2, v1
|
; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v2, v1
|
||||||
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
@ -387,9 +388,9 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) {
|
|||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
; GFX10-SDAG-NEXT: v_max_u16 v1, v1, v3
|
; GFX10-SDAG-NEXT: v_max_u16 v1, v1, v3
|
||||||
; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 8
|
|
||||||
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX10-SDAG-NEXT: v_lshrrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v2, v1
|
; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v2, v1
|
||||||
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
@ -429,8 +430,8 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) {
|
|||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h
|
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v0.h, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v0.h, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||||
@ -446,8 +447,8 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) {
|
|||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v3
|
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v3
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
||||||
@ -500,8 +501,8 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) {
|
|||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h
|
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v0.h, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v0.h, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||||
@ -521,8 +522,8 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) {
|
|||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v3
|
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v3
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
||||||
@ -572,18 +573,18 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) {
|
|||||||
; GFX7-SDAG-LABEL: test_vector_reduce_umax_v8i8:
|
; GFX7-SDAG-LABEL: test_vector_reduce_umax_v8i8:
|
||||||
; GFX7-SDAG: ; %bb.0: ; %entry
|
; GFX7-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v2, v2, v6
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v3, v3, v7
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v5, v3
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
|
||||||
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v4, v2
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||||
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v4
|
||||||
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v1, v1, v5
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v3, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v2, v6
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1
|
||||||
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
@ -628,7 +629,7 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) {
|
|||||||
; GFX8-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
; GFX8-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||||
; GFX8-SDAG-NEXT: v_max_u16_e32 v0, v0, v2
|
; GFX8-SDAG-NEXT: v_max_u16_e32 v0, v0, v2
|
||||||
; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX8-SDAG-NEXT: v_max_u16_e32 v0, v0, v1
|
; GFX8-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
||||||
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX8-GISEL-LABEL: test_vector_reduce_umax_v8i8:
|
; GFX8-GISEL-LABEL: test_vector_reduce_umax_v8i8:
|
||||||
@ -660,17 +661,17 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) {
|
|||||||
; GFX9-SDAG-LABEL: test_vector_reduce_umax_v8i8:
|
; GFX9-SDAG-LABEL: test_vector_reduce_umax_v8i8:
|
||||||
; GFX9-SDAG: ; %bb.0: ; %entry
|
; GFX9-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX9-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
; GFX9-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
|
||||||
; GFX9-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
; GFX9-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||||
; GFX9-SDAG-NEXT: v_max_u16_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_max_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_max3_u16 v1, v1, v5, v3
|
; GFX9-SDAG-NEXT: v_max3_u16 v1, v1, v3, v7
|
||||||
; GFX9-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
|
; GFX9-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
|
||||||
; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
; GFX9-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
; GFX9-SDAG-NEXT: v_max_u16_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_max_u16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
||||||
; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v4, v2
|
; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v2, v6
|
||||||
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX9-SDAG-NEXT: v_max_u16_e32 v0, v0, v1
|
; GFX9-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX9-GISEL-LABEL: test_vector_reduce_umax_v8i8:
|
; GFX9-GISEL-LABEL: test_vector_reduce_umax_v8i8:
|
||||||
@ -702,21 +703,21 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) {
|
|||||||
; GFX10-SDAG-LABEL: test_vector_reduce_umax_v8i8:
|
; GFX10-SDAG-LABEL: test_vector_reduce_umax_v8i8:
|
||||||
; GFX10-SDAG: ; %bb.0: ; %entry
|
; GFX10-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||||
; GFX10-SDAG-NEXT: v_max_u16 v3, v3, v7
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
; GFX10-SDAG-NEXT: v_max_u16 v2, v2, v6
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
; GFX10-SDAG-NEXT: v_max3_u16 v1, v1, v5, v3
|
; GFX10-SDAG-NEXT: v_max_u16 v1, v1, v5
|
||||||
|
; GFX10-SDAG-NEXT: v_max3_u16 v1, v1, v3, v7
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v4
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v4
|
||||||
; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 8
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v6
|
||||||
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v3, v2
|
; GFX10-SDAG-NEXT: v_max_u16 v0, v0, v3
|
||||||
; GFX10-SDAG-NEXT: v_lshrrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
|
; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v2, v4
|
||||||
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX10-SDAG-NEXT: v_max_u16 v0, v0, v1
|
; GFX10-SDAG-NEXT: v_max_u16 v0, v0, v1
|
||||||
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
@ -756,50 +757,49 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) {
|
|||||||
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umax_v8i8:
|
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umax_v8i8:
|
||||||
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
|
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v7.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v5.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v5.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v7.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.h, v3.l, v1.h
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v1.l, v1.l, v3.h, v1.h
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v1.l, v3.l, v3.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.h, v1.h, v0.h
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v4.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v4.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v3.l, 8, v0.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v1.l, v0.h
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v3
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v1.h, v0.h
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v2.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v0.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v8i8:
|
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v8i8:
|
||||||
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
|
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v3, v3, v7
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v5, v3
|
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v5
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v6
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v3, v7
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v6
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v2, v2, v3
|
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v3
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v4, v2
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v4
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
||||||
@ -852,27 +852,26 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) {
|
|||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v7.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v5.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v5.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v7.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.h, v3.l, v1.h
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v1.l, v1.l, v3.h, v1.h
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v1.l, v3.l, v3.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.h, v1.h, v0.h
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v4.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v4.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v3.l, 8, v0.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v1.l, v0.h
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v3
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v1.h, v0.h
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v2.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v0.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX12-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v8i8:
|
; GFX12-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v8i8:
|
||||||
@ -882,24 +881,24 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) {
|
|||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v3, v3, v7
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v5, v3
|
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v5
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v6
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v3, v7
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v6
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v2, v2, v3
|
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v3
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v4, v2
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v4
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
||||||
@ -957,32 +956,32 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) {
|
|||||||
; GFX7-SDAG-LABEL: test_vector_reduce_umax_v16i8:
|
; GFX7-SDAG-LABEL: test_vector_reduce_umax_v16i8:
|
||||||
; GFX7-SDAG: ; %bb.0: ; %entry
|
; GFX7-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v7, v7, v15
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v6, v6, v14
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v4, v4, v12
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v8
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v5, v5, v13
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v1, v1, v9
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v1, v1, v9
|
||||||
; GFX7-SDAG-NEXT: v_max3_u32 v2, v2, v10, v6
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v8
|
||||||
; GFX7-SDAG-NEXT: v_max3_u32 v3, v3, v11, v7
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v6, v6, v14
|
||||||
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v5, v3
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v2, v2, v10
|
||||||
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v4, v2
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v7, v7, v15
|
||||||
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v3, v3, v11
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v4, v12
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v5, v13
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v3, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v2, v6
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1
|
||||||
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
@ -1051,9 +1050,8 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) {
|
|||||||
; GFX8-SDAG-NEXT: v_max_u16_e32 v0, v0, v2
|
; GFX8-SDAG-NEXT: v_max_u16_e32 v0, v0, v2
|
||||||
; GFX8-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
; GFX8-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||||
; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
||||||
; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, 8
|
; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
||||||
; GFX8-SDAG-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
; GFX8-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX8-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
||||||
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX8-GISEL-LABEL: test_vector_reduce_umax_v16i8:
|
; GFX8-GISEL-LABEL: test_vector_reduce_umax_v16i8:
|
||||||
@ -1093,25 +1091,24 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) {
|
|||||||
; GFX9-SDAG-LABEL: test_vector_reduce_umax_v16i8:
|
; GFX9-SDAG-LABEL: test_vector_reduce_umax_v16i8:
|
||||||
; GFX9-SDAG: ; %bb.0: ; %entry
|
; GFX9-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX9-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11
|
; GFX9-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13
|
||||||
; GFX9-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
; GFX9-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
||||||
; GFX9-SDAG-NEXT: v_max_u16_sdwa v7, v7, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
|
||||||
; GFX9-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
||||||
; GFX9-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
||||||
; GFX9-SDAG-NEXT: v_max_u16_sdwa v5, v5, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
|
||||||
; GFX9-SDAG-NEXT: v_max_u16_sdwa v1, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_max_u16_sdwa v1, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_max_u16_sdwa v6, v6, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12
|
||||||
; GFX9-SDAG-NEXT: v_max3_u16 v3, v3, v11, v7
|
; GFX9-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
|
||||||
; GFX9-SDAG-NEXT: v_max_u16_sdwa v4, v4, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_max_u16_sdwa v7, v7, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
|
; GFX9-SDAG-NEXT: v_max_u16_sdwa v3, v3, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_max_u16_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_max_u16_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_max3_u16 v2, v2, v10, v6
|
; GFX9-SDAG-NEXT: v_max3_u16 v1, v1, v5, v13
|
||||||
; GFX9-SDAG-NEXT: v_max3_u16 v1, v1, v5, v3
|
; GFX9-SDAG-NEXT: v_max_u16_sdwa v6, v6, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v4, v2
|
; GFX9-SDAG-NEXT: v_max_u16_sdwa v2, v2, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
|
; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v4, v12
|
||||||
|
; GFX9-SDAG-NEXT: v_max3_u16 v1, v1, v3, v7
|
||||||
|
; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v2, v6
|
||||||
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
||||||
; GFX9-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
; GFX9-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
||||||
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 8
|
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
||||||
; GFX9-SDAG-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
; GFX9-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
||||||
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX9-GISEL-LABEL: test_vector_reduce_umax_v16i8:
|
; GFX9-GISEL-LABEL: test_vector_reduce_umax_v16i8:
|
||||||
@ -1151,38 +1148,38 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) {
|
|||||||
; GFX10-SDAG-LABEL: test_vector_reduce_umax_v16i8:
|
; GFX10-SDAG-LABEL: test_vector_reduce_umax_v16i8:
|
||||||
; GFX10-SDAG: ; %bb.0: ; %entry
|
; GFX10-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9
|
||||||
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
||||||
; GFX10-SDAG-NEXT: v_max_u16 v7, v7, v15
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11
|
||||||
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15
|
||||||
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
|
||||||
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||||
|
; GFX10-SDAG-NEXT: v_max_u16 v1, v1, v9
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
|
||||||
; GFX10-SDAG-NEXT: v_max_u16 v5, v5, v13
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
|
||||||
; GFX10-SDAG-NEXT: v_max_u16 v1, v1, v9
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
; GFX10-SDAG-NEXT: v_max_u16 v6, v6, v14
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14
|
||||||
; GFX10-SDAG-NEXT: v_max3_u16 v3, v3, v11, v7
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
|
||||||
; GFX10-SDAG-NEXT: v_max_u16 v4, v4, v12
|
; GFX10-SDAG-NEXT: v_max_u16 v7, v7, v15
|
||||||
|
; GFX10-SDAG-NEXT: v_max_u16 v3, v3, v11
|
||||||
; GFX10-SDAG-NEXT: v_max_u16 v0, v0, v8
|
; GFX10-SDAG-NEXT: v_max_u16 v0, v0, v8
|
||||||
; GFX10-SDAG-NEXT: v_max3_u16 v2, v2, v10, v6
|
; GFX10-SDAG-NEXT: v_max3_u16 v1, v1, v5, v13
|
||||||
; GFX10-SDAG-NEXT: v_max3_u16 v1, v1, v5, v3
|
; GFX10-SDAG-NEXT: v_max_u16 v5, v6, v14
|
||||||
; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v4, v2
|
; GFX10-SDAG-NEXT: v_max_u16 v2, v2, v10
|
||||||
|
; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v4, v12
|
||||||
|
; GFX10-SDAG-NEXT: v_max3_u16 v1, v1, v3, v7
|
||||||
|
; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v2, v5
|
||||||
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
||||||
; GFX10-SDAG-NEXT: v_mov_b32_e32 v1, 8
|
; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v0
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
; GFX10-SDAG-NEXT: v_lshrrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX10-SDAG-NEXT: v_max_u16 v0, v2, v0
|
; GFX10-SDAG-NEXT: v_max_u16 v0, v0, v1
|
||||||
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX10-GISEL-LABEL: test_vector_reduce_umax_v16i8:
|
; GFX10-GISEL-LABEL: test_vector_reduce_umax_v16i8:
|
||||||
@ -1237,84 +1234,82 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) {
|
|||||||
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umax_v16i8:
|
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umax_v16i8:
|
||||||
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
|
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v10.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v9.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v15.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v7.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v13.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v11.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v14.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v9.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v13.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v8.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.h, v0.h, v10.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v12.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v11.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v15.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v7.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.h, v0.h, v9.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v12.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v4.h, v5.l, v4.h
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v10.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.h, v6.l, v6.h
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v14.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v3.l, v3.h, v0.h
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v3.l, v4.l, v5.h
|
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v3.h, v6.h, v3.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v7.l
|
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v2.h, v3.l, v2.h
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v0.h, v5.l, v5.h
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v6.l, v7.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.h, v2.l, v1.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v1.h, v2.l, v2.h, v1.h
|
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v4.l, v4.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v1.l, v4.h, v0.h
|
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v0.h, v2.h, v3.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v3.l, v1.h
|
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v1.h, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h
|
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v0.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v16i8:
|
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v16i8:
|
||||||
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
|
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v7, v7, v15
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v9
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v5, v5, v13
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v9
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v6, v6, v14
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v3, v3, v11, v7
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v4, v4, v12
|
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v7, v7, v15
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v3, v3, v11
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v8
|
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v5, v13
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v5, v6, v14
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v2, v2, v10
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v2, v2, v10, v6
|
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v4, v12
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v5, v3
|
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v3, v7
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v4, v2
|
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v5
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v0
|
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
@ -1382,44 +1377,42 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) {
|
|||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v10.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v9.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v15.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v7.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v13.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v11.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v14.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v9.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v13.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v8.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.h, v0.h, v10.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v12.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v11.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v15.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v7.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.h, v0.h, v9.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v12.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v4.h, v5.l, v4.h
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v10.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.h, v6.l, v6.h
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v14.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v3.l, v3.h, v0.h
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v3.l, v4.l, v5.h
|
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v3.h, v6.h, v3.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v7.l
|
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v2.h, v3.l, v2.h
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v0.h, v5.l, v5.h
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v6.l, v7.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.h, v2.l, v1.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v1.h, v2.l, v2.h, v1.h
|
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v4.l, v4.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v1.l, v4.h, v0.h
|
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v0.h, v2.h, v3.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v3.l, v1.h
|
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v1.h, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h
|
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h
|
; GFX12-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v0.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX12-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v16i8:
|
; GFX12-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v16i8:
|
||||||
@ -1429,41 +1422,41 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) {
|
|||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v7, v7, v15
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v9
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v5, v5, v13
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v9
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v6, v6, v14
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v3, v3, v11, v7
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v4, v4, v12
|
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v7, v7, v15
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v3, v3, v11
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v8
|
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v5, v13
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v5, v6, v14
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v2, v2, v10
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v2, v2, v10, v6
|
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v4, v12
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v5, v3
|
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v3, v7
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v4, v2
|
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v5
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v0
|
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
@ -1940,18 +1933,18 @@ define i16 @test_vector_reduce_umax_v8i16(<8 x i16> %v) {
|
|||||||
; GFX7-SDAG-LABEL: test_vector_reduce_umax_v8i16:
|
; GFX7-SDAG-LABEL: test_vector_reduce_umax_v8i16:
|
||||||
; GFX7-SDAG: ; %bb.0: ; %entry
|
; GFX7-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v2, v2, v6
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v3, v3, v7
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
||||||
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v5, v3
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
||||||
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v4, v2
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
|
||||||
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v4
|
||||||
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v1, v1, v5
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v3, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v2, v6
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1
|
||||||
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
@ -2136,32 +2129,32 @@ define i16 @test_vector_reduce_umax_v16i16(<16 x i16> %v) {
|
|||||||
; GFX7-SDAG-LABEL: test_vector_reduce_umax_v16i16:
|
; GFX7-SDAG-LABEL: test_vector_reduce_umax_v16i16:
|
||||||
; GFX7-SDAG: ; %bb.0: ; %entry
|
; GFX7-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xffff, v14
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xffff, v15
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v11
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v10
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v9
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v9
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xffff, v13
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xffff, v13
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xffff, v12
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xffff, v12
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v7, v7, v15
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v11
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v6, v6, v14
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v4, v4, v12
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xffff, v15
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v8
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v5, v5, v13
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v10
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xffff, v14
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v1, v1, v9
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v1, v1, v9
|
||||||
; GFX7-SDAG-NEXT: v_max3_u32 v2, v2, v10, v6
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v8
|
||||||
; GFX7-SDAG-NEXT: v_max3_u32 v3, v3, v11, v7
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v6, v6, v14
|
||||||
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v5, v3
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v2, v2, v10
|
||||||
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v4, v2
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v7, v7, v15
|
||||||
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v3, v3, v11
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v4, v12
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v5, v13
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v3, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v2, v6
|
||||||
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1
|
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1
|
||||||
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
|
@ -485,18 +485,18 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) {
|
|||||||
; GFX7-SDAG-LABEL: test_vector_reduce_umin_v8i8:
|
; GFX7-SDAG-LABEL: test_vector_reduce_umin_v8i8:
|
||||||
; GFX7-SDAG: ; %bb.0: ; %entry
|
; GFX7-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v2, v2, v6
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v3, v3, v7
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v5, v3
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
|
||||||
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v4, v2
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||||
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v4
|
||||||
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v1, v1, v5
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v3, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v2, v6
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1
|
||||||
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
@ -549,15 +549,15 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) {
|
|||||||
; GFX9-SDAG-LABEL: test_vector_reduce_umin_v8i8:
|
; GFX9-SDAG-LABEL: test_vector_reduce_umin_v8i8:
|
||||||
; GFX9-SDAG: ; %bb.0: ; %entry
|
; GFX9-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX9-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
; GFX9-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
|
||||||
; GFX9-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
; GFX9-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||||
; GFX9-SDAG-NEXT: v_min_u16_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_min_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_min3_u16 v1, v1, v5, v3
|
; GFX9-SDAG-NEXT: v_min3_u16 v1, v1, v3, v7
|
||||||
; GFX9-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
|
; GFX9-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
|
||||||
; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
; GFX9-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
; GFX9-SDAG-NEXT: v_min_u16_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_min_u16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
||||||
; GFX9-SDAG-NEXT: v_min3_u16 v0, v0, v4, v2
|
; GFX9-SDAG-NEXT: v_min3_u16 v0, v0, v2, v6
|
||||||
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX9-SDAG-NEXT: v_min_u16_e32 v0, v0, v1
|
; GFX9-SDAG-NEXT: v_min_u16_e32 v0, v0, v1
|
||||||
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
@ -578,20 +578,20 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) {
|
|||||||
; GFX10-SDAG-LABEL: test_vector_reduce_umin_v8i8:
|
; GFX10-SDAG-LABEL: test_vector_reduce_umin_v8i8:
|
||||||
; GFX10-SDAG: ; %bb.0: ; %entry
|
; GFX10-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||||
; GFX10-SDAG-NEXT: v_min_u16 v3, v3, v7
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
; GFX10-SDAG-NEXT: v_min_u16 v2, v2, v6
|
; GFX10-SDAG-NEXT: v_min_u16 v1, v1, v5
|
||||||
; GFX10-SDAG-NEXT: v_min3_u16 v1, v1, v5, v3
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v4
|
; GFX10-SDAG-NEXT: v_min_u16 v0, v0, v4
|
||||||
|
; GFX10-SDAG-NEXT: v_min3_u16 v1, v1, v3, v7
|
||||||
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v6
|
||||||
; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 8
|
; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 8
|
||||||
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX10-SDAG-NEXT: v_min3_u16 v0, v0, v3, v2
|
; GFX10-SDAG-NEXT: v_min3_u16 v0, v0, v2, v3
|
||||||
; GFX10-SDAG-NEXT: v_lshrrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
; GFX10-SDAG-NEXT: v_lshrrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||||
; GFX10-SDAG-NEXT: v_min_u16 v0, v0, v1
|
; GFX10-SDAG-NEXT: v_min_u16 v0, v0, v1
|
||||||
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
@ -620,24 +620,24 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) {
|
|||||||
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umin_v8i8:
|
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umin_v8i8:
|
||||||
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
|
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v7.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v5.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v5.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v7.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v4.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v1.h, v3.l, v1.h
|
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v1.l, v1.l, v1.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v1.l, v1.l, v3.h, v1.h
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v0.h
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v1.l, v1.l, v3.l, v3.h
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v6.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.h, v1.h, v0.h
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v3
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v1.h, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v4.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v1.l, v0.h
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2
|
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v1.l
|
||||||
@ -646,23 +646,23 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) {
|
|||||||
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umin_v8i8:
|
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umin_v8i8:
|
||||||
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
|
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v3, v3, v7
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v5, v3
|
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v1, v1, v5
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v6
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v3, v7
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v6
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v2, v2, v3
|
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v3
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v4, v2
|
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v2, v4
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v1
|
||||||
@ -699,24 +699,24 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) {
|
|||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v7.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v5.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v5.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v7.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v4.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v1.h, v3.l, v1.h
|
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v1.l, v1.l, v1.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v1.l, v1.l, v3.h, v1.h
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v0.h
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v1.l, v1.l, v3.l, v3.h
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v6.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.h, v1.h, v0.h
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v3
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v1.h, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v4.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v1.l, v0.h
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2
|
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v1.l
|
||||||
@ -729,23 +729,23 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) {
|
|||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v3, v3, v7
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v5, v3
|
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v1, v1, v5
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v6
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v3, v7
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v6
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v2, v2, v3
|
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v3
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v4, v2
|
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v2, v4
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v1
|
||||||
@ -787,32 +787,32 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) {
|
|||||||
; GFX7-SDAG-LABEL: test_vector_reduce_umin_v16i8:
|
; GFX7-SDAG-LABEL: test_vector_reduce_umin_v16i8:
|
||||||
; GFX7-SDAG: ; %bb.0: ; %entry
|
; GFX7-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v7, v7, v15
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v6, v6, v14
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v4, v4, v12
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v8
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v5, v5, v13
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v1, v1, v9
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v1, v1, v9
|
||||||
; GFX7-SDAG-NEXT: v_min3_u32 v2, v2, v10, v6
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v8
|
||||||
; GFX7-SDAG-NEXT: v_min3_u32 v3, v3, v11, v7
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v6, v6, v14
|
||||||
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v5, v3
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v2, v2, v10
|
||||||
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v4, v2
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v7, v7, v15
|
||||||
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v3, v3, v11
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v4, v12
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v5, v13
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v3, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v2, v6
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1
|
||||||
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
@ -899,20 +899,20 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) {
|
|||||||
; GFX9-SDAG-LABEL: test_vector_reduce_umin_v16i8:
|
; GFX9-SDAG-LABEL: test_vector_reduce_umin_v16i8:
|
||||||
; GFX9-SDAG: ; %bb.0: ; %entry
|
; GFX9-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX9-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11
|
; GFX9-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13
|
||||||
; GFX9-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
; GFX9-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
||||||
; GFX9-SDAG-NEXT: v_min_u16_sdwa v7, v7, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
|
||||||
; GFX9-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
||||||
; GFX9-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
||||||
; GFX9-SDAG-NEXT: v_min_u16_sdwa v5, v5, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
|
||||||
; GFX9-SDAG-NEXT: v_min_u16_sdwa v1, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_min_u16_sdwa v1, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_min_u16_sdwa v6, v6, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12
|
||||||
; GFX9-SDAG-NEXT: v_min3_u16 v3, v3, v11, v7
|
; GFX9-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
|
||||||
; GFX9-SDAG-NEXT: v_min_u16_sdwa v4, v4, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_min_u16_sdwa v7, v7, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
|
; GFX9-SDAG-NEXT: v_min_u16_sdwa v3, v3, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_min_u16_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
; GFX9-SDAG-NEXT: v_min_u16_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_min3_u16 v2, v2, v10, v6
|
; GFX9-SDAG-NEXT: v_min3_u16 v1, v1, v5, v13
|
||||||
; GFX9-SDAG-NEXT: v_min3_u16 v1, v1, v5, v3
|
; GFX9-SDAG-NEXT: v_min_u16_sdwa v6, v6, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
; GFX9-SDAG-NEXT: v_min3_u16 v0, v0, v4, v2
|
; GFX9-SDAG-NEXT: v_min_u16_sdwa v2, v2, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
|
||||||
|
; GFX9-SDAG-NEXT: v_min3_u16 v0, v0, v4, v12
|
||||||
|
; GFX9-SDAG-NEXT: v_min3_u16 v1, v1, v3, v7
|
||||||
|
; GFX9-SDAG-NEXT: v_min3_u16 v0, v0, v2, v6
|
||||||
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
||||||
; GFX9-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
; GFX9-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
||||||
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 8
|
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 8
|
||||||
@ -944,32 +944,32 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) {
|
|||||||
; GFX10-SDAG-LABEL: test_vector_reduce_umin_v16i8:
|
; GFX10-SDAG-LABEL: test_vector_reduce_umin_v16i8:
|
||||||
; GFX10-SDAG: ; %bb.0: ; %entry
|
; GFX10-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9
|
||||||
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
||||||
; GFX10-SDAG-NEXT: v_min_u16 v7, v7, v15
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11
|
||||||
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15
|
||||||
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
|
||||||
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||||
|
; GFX10-SDAG-NEXT: v_min_u16 v1, v1, v9
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12
|
||||||
; GFX10-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
|
||||||
; GFX10-SDAG-NEXT: v_min_u16 v5, v5, v13
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
|
||||||
; GFX10-SDAG-NEXT: v_min_u16 v1, v1, v9
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
; GFX10-SDAG-NEXT: v_min_u16 v6, v6, v14
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14
|
||||||
; GFX10-SDAG-NEXT: v_min3_u16 v3, v3, v11, v7
|
; GFX10-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
|
||||||
; GFX10-SDAG-NEXT: v_min_u16 v4, v4, v12
|
; GFX10-SDAG-NEXT: v_min_u16 v7, v7, v15
|
||||||
|
; GFX10-SDAG-NEXT: v_min_u16 v3, v3, v11
|
||||||
; GFX10-SDAG-NEXT: v_min_u16 v0, v0, v8
|
; GFX10-SDAG-NEXT: v_min_u16 v0, v0, v8
|
||||||
; GFX10-SDAG-NEXT: v_min3_u16 v2, v2, v10, v6
|
; GFX10-SDAG-NEXT: v_min3_u16 v1, v1, v5, v13
|
||||||
; GFX10-SDAG-NEXT: v_min3_u16 v1, v1, v5, v3
|
; GFX10-SDAG-NEXT: v_min_u16 v5, v6, v14
|
||||||
; GFX10-SDAG-NEXT: v_min3_u16 v0, v0, v4, v2
|
; GFX10-SDAG-NEXT: v_min_u16 v2, v2, v10
|
||||||
|
; GFX10-SDAG-NEXT: v_min3_u16 v0, v0, v4, v12
|
||||||
|
; GFX10-SDAG-NEXT: v_min3_u16 v1, v1, v3, v7
|
||||||
|
; GFX10-SDAG-NEXT: v_min3_u16 v0, v0, v2, v5
|
||||||
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
||||||
; GFX10-SDAG-NEXT: v_mov_b32_e32 v1, 8
|
; GFX10-SDAG-NEXT: v_mov_b32_e32 v1, 8
|
||||||
@ -1018,34 +1018,34 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) {
|
|||||||
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umin_v16i8:
|
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umin_v16i8:
|
||||||
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
|
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v10.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v9.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v15.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v7.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v13.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v11.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v14.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v9.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v13.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v8.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.h, v0.h, v10.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l
|
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v12.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v11.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v15.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v7.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.h, v0.h, v9.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v12.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v4.h, v5.l, v4.h
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v10.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v1.l, v1.l, v1.h
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v1.h, v6.l, v6.h
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v14.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v3.l, v3.h, v0.h
|
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v3.l, v4.l, v5.h
|
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v3.h, v6.h, v3.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v7.l
|
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v2.h, v3.l, v2.h
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v1.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v0.h, v5.l, v5.h
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v1.l, v6.l, v7.l
|
||||||
|
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v1.h, v2.l, v1.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v1.h, v2.l, v2.h, v1.h
|
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v4.l, v4.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v1.l, v4.h, v0.h
|
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v0.h, v2.h, v3.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v3.l, v1.h
|
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v1.h, v1.l
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h
|
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h
|
||||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h
|
; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h
|
||||||
@ -1061,34 +1061,34 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) {
|
|||||||
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umin_v16i8:
|
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umin_v16i8:
|
||||||
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
|
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v7, v7, v15
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v1, v1, v9
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v5, v5, v13
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v1, v1, v9
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v6, v6, v14
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v3, v3, v11, v7
|
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v4, v4, v12
|
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v7, v7, v15
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v3, v3, v11
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v8
|
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v8
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v5, v13
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v5, v6, v14
|
||||||
|
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v2, v2, v10
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v2, v2, v10, v6
|
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v4, v12
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v5, v3
|
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v3, v7
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v4, v2
|
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v2, v5
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
||||||
@ -1147,34 +1147,34 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) {
|
|||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
|
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v10.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v9.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v15.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v7.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v13.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v11.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v14.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v9.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v13.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v8.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.h, v0.h, v10.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l
|
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v12.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v11.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v15.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v7.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.h, v0.h, v9.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v12.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v4.h, v5.l, v4.h
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v10.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v1.l, v1.l, v1.h
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v1.h, v6.l, v6.h
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v14.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v3.l, v3.h, v0.h
|
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v3.l, v4.l, v5.h
|
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v3.h, v6.h, v3.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v7.l
|
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v2.h, v3.l, v2.h
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v1.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v0.h, v5.l, v5.h
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v1.l, v6.l, v7.l
|
||||||
|
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v1.h, v2.l, v1.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v1.h, v2.l, v2.h, v1.h
|
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v4.l, v4.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v1.l, v4.h, v0.h
|
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v0.h, v2.h, v3.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v3.l, v1.h
|
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v1.h, v1.l
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h
|
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h
|
||||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX12-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h
|
; GFX12-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h
|
||||||
@ -1194,34 +1194,34 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) {
|
|||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
|
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v7, v7, v15
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v1, v1, v9
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v5, v5, v13
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v1, v1, v9
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v6, v6, v14
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v3, v3, v11, v7
|
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v4, v4, v12
|
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v7, v7, v15
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v3, v3, v11
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v8
|
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v8
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v5, v13
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v5, v6, v14
|
||||||
|
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v2, v2, v10
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v2, v2, v10, v6
|
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v4, v12
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v5, v3
|
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v3, v7
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v4, v2
|
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v2, v5
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
|
||||||
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
; GFX12-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
; GFX12-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
|
||||||
@ -1685,18 +1685,18 @@ define i16 @test_vector_reduce_umin_v8i16(<8 x i16> %v) {
|
|||||||
; GFX7-SDAG-LABEL: test_vector_reduce_umin_v8i16:
|
; GFX7-SDAG-LABEL: test_vector_reduce_umin_v8i16:
|
||||||
; GFX7-SDAG: ; %bb.0: ; %entry
|
; GFX7-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v2, v2, v6
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v3, v3, v7
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
||||||
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v5, v3
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
||||||
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v4, v2
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
|
||||||
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v4
|
||||||
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v1, v1, v5
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v3, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v2, v6
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1
|
||||||
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
@ -1878,32 +1878,32 @@ define i16 @test_vector_reduce_umin_v16i16(<16 x i16> %v) {
|
|||||||
; GFX7-SDAG-LABEL: test_vector_reduce_umin_v16i16:
|
; GFX7-SDAG-LABEL: test_vector_reduce_umin_v16i16:
|
||||||
; GFX7-SDAG: ; %bb.0: ; %entry
|
; GFX7-SDAG: ; %bb.0: ; %entry
|
||||||
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xffff, v14
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xffff, v15
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v11
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v10
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v9
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v9
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xffff, v13
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xffff, v13
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xffff, v12
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xffff, v12
|
||||||
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v7, v7, v15
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v11
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v6, v6, v14
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v4, v4, v12
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xffff, v15
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v8
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v5, v5, v13
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v10
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xffff, v14
|
||||||
|
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v1, v1, v9
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v1, v1, v9
|
||||||
; GFX7-SDAG-NEXT: v_min3_u32 v2, v2, v10, v6
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v8
|
||||||
; GFX7-SDAG-NEXT: v_min3_u32 v3, v3, v11, v7
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v6, v6, v14
|
||||||
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v5, v3
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v2, v2, v10
|
||||||
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v4, v2
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v7, v7, v15
|
||||||
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v3, v3, v11
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v4, v12
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v5, v13
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v3, v7
|
||||||
|
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v2, v6
|
||||||
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1
|
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1
|
||||||
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
|
@ -62,12 +62,12 @@ define <8 x i16> @combine_constfold_v8i16() {
|
|||||||
define <8 x i16> @combine_constfold_undef_v8i16() {
|
define <8 x i16> @combine_constfold_undef_v8i16() {
|
||||||
; SSE-LABEL: combine_constfold_undef_v8i16:
|
; SSE-LABEL: combine_constfold_undef_v8i16:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65534,0,65280,32768,0]
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,u,65534,0,65280,32768,0]
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: combine_constfold_undef_v8i16:
|
; AVX-LABEL: combine_constfold_undef_v8i16:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65535,65535,65534,0,65280,32768,0]
|
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65535,u,65534,0,65280,32768,0]
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%res = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -32760, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 -10, i16 65535>)
|
%res = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -32760, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 -10, i16 65535>)
|
||||||
ret <8 x i16> %res
|
ret <8 x i16> %res
|
||||||
|
@ -62,12 +62,13 @@ define <8 x i16> @combine_constfold_v8i16() {
|
|||||||
define <8 x i16> @combine_constfold_undef_v8i16() {
|
define <8 x i16> @combine_constfold_undef_v8i16() {
|
||||||
; SSE-LABEL: combine_constfold_undef_v8i16:
|
; SSE-LABEL: combine_constfold_undef_v8i16:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65535,65535,65535,2,65535]
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,u,65535,65535,65535,2,65535]
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: combine_constfold_undef_v8i16:
|
; AVX-LABEL: combine_constfold_undef_v8i16:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65535,65535,65535,65535,65535,2,65535]
|
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = [65535,65535,2,65535,65535,65535,2,65535]
|
||||||
|
; AVX-NEXT: # xmm0 = mem[0,0]
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%res = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -65535, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 1, i16 65535>)
|
%res = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -65535, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 1, i16 65535>)
|
||||||
ret <8 x i16> %res
|
ret <8 x i16> %res
|
||||||
|
@ -62,12 +62,12 @@ define <8 x i16> @combine_constfold_v8i16() {
|
|||||||
define <8 x i16> @combine_constfold_undef_v8i16() {
|
define <8 x i16> @combine_constfold_undef_v8i16() {
|
||||||
; SSE-LABEL: combine_constfold_undef_v8i16:
|
; SSE-LABEL: combine_constfold_undef_v8i16:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,65534,65282,32786,2]
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,u,0,65534,65282,32786,2]
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: combine_constfold_undef_v8i16:
|
; AVX-LABEL: combine_constfold_undef_v8i16:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65534,65282,32786,2]
|
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,u,0,65534,65282,32786,2]
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%res = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -32760, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 -10, i16 65535>)
|
%res = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -32760, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 -10, i16 65535>)
|
||||||
ret <8 x i16> %res
|
ret <8 x i16> %res
|
||||||
|
@ -73,17 +73,17 @@ define <8 x i16> @combine_constfold_v8i16() {
|
|||||||
define <8 x i16> @combine_constfold_undef_v8i16() {
|
define <8 x i16> @combine_constfold_undef_v8i16() {
|
||||||
; SSE-LABEL: combine_constfold_undef_v8i16:
|
; SSE-LABEL: combine_constfold_undef_v8i16:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,65534,0,0,0]
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,u,0,65534,0,0,0]
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: combine_constfold_undef_v8i16:
|
; AVX1-LABEL: combine_constfold_undef_v8i16:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65534,0,0,0]
|
; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,u,0,65534,0,0,0]
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: combine_constfold_undef_v8i16:
|
; AVX2-LABEL: combine_constfold_undef_v8i16:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65534,0,0,0]
|
; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,u,0,65534,0,0,0]
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: combine_constfold_undef_v8i16:
|
; AVX512-LABEL: combine_constfold_undef_v8i16:
|
||||||
|
@ -800,13 +800,13 @@ define void @shift_i32_by_32(ptr %src1, ptr %src2, ptr %dst) {
|
|||||||
; CHECK-LABEL: shift_i32_by_32:
|
; CHECK-LABEL: shift_i32_by_32:
|
||||||
; CHECK: # %bb.0: # %entry
|
; CHECK: # %bb.0: # %entry
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; CHECK-NEXT: movl $-1, 4(%eax)
|
; CHECK-NEXT: movl $0, 4(%eax)
|
||||||
; CHECK-NEXT: movl $-1, (%eax)
|
; CHECK-NEXT: movl $0, (%eax)
|
||||||
; CHECK-NEXT: retl
|
; CHECK-NEXT: retl
|
||||||
;
|
;
|
||||||
; CHECK64-LABEL: shift_i32_by_32:
|
; CHECK64-LABEL: shift_i32_by_32:
|
||||||
; CHECK64: # %bb.0: # %entry
|
; CHECK64: # %bb.0: # %entry
|
||||||
; CHECK64-NEXT: movq $-1, (%rdx)
|
; CHECK64-NEXT: movq $0, (%rdx)
|
||||||
; CHECK64-NEXT: retq
|
; CHECK64-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%load1 = load i8, ptr %src1, align 1
|
%load1 = load i8, ptr %src1, align 1
|
||||||
|
@ -7,12 +7,10 @@
|
|||||||
define void @PR33960() {
|
define void @PR33960() {
|
||||||
; X86-LABEL: PR33960:
|
; X86-LABEL: PR33960:
|
||||||
; X86: # %bb.0: # %entry
|
; X86: # %bb.0: # %entry
|
||||||
; X86-NEXT: movl $-1, b
|
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: PR33960:
|
; X64-LABEL: PR33960:
|
||||||
; X64: # %bb.0: # %entry
|
; X64: # %bb.0: # %entry
|
||||||
; X64-NEXT: movl $-1, b(%rip)
|
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%tmp = insertelement <4 x i32> <i32 undef, i32 -7, i32 -3, i32 undef>, i32 -2, i32 3
|
%tmp = insertelement <4 x i32> <i32 undef, i32 -7, i32 -3, i32 undef>, i32 -2, i32 3
|
||||||
|
@ -42,6 +42,7 @@ add_llvm_unittest(CodeGenTests
|
|||||||
ScalableVectorMVTsTest.cpp
|
ScalableVectorMVTsTest.cpp
|
||||||
SchedBoundary.cpp
|
SchedBoundary.cpp
|
||||||
SelectionDAGAddressAnalysisTest.cpp
|
SelectionDAGAddressAnalysisTest.cpp
|
||||||
|
SelectionDAGNodeConstructionTest.cpp
|
||||||
SelectionDAGPatternMatchTest.cpp
|
SelectionDAGPatternMatchTest.cpp
|
||||||
TypeTraitsTest.cpp
|
TypeTraitsTest.cpp
|
||||||
TargetOptionsTest.cpp
|
TargetOptionsTest.cpp
|
||||||
|
@ -7,103 +7,12 @@
|
|||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
|
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
|
||||||
|
#include "SelectionDAGTestBase.h"
|
||||||
#include "llvm/Analysis/MemoryLocation.h"
|
#include "llvm/Analysis/MemoryLocation.h"
|
||||||
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
|
||||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
|
||||||
#include "llvm/AsmParser/Parser.h"
|
|
||||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
|
||||||
#include "llvm/CodeGen/SelectionDAG.h"
|
|
||||||
#include "llvm/CodeGen/TargetLowering.h"
|
|
||||||
#include "llvm/IR/Module.h"
|
|
||||||
#include "llvm/MC/TargetRegistry.h"
|
|
||||||
#include "llvm/Support/SourceMgr.h"
|
|
||||||
#include "llvm/Support/TargetSelect.h"
|
|
||||||
#include "llvm/Target/TargetMachine.h"
|
|
||||||
#include "gtest/gtest.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
|
|
||||||
class SelectionDAGAddressAnalysisTest : public testing::Test {
|
class SelectionDAGAddressAnalysisTest : public SelectionDAGTestBase {};
|
||||||
protected:
|
|
||||||
static void SetUpTestCase() {
|
|
||||||
InitializeAllTargets();
|
|
||||||
InitializeAllTargetMCs();
|
|
||||||
}
|
|
||||||
|
|
||||||
void SetUp() override {
|
|
||||||
StringRef Assembly = "@g = global i32 0\n"
|
|
||||||
"@g_alias = alias i32, i32* @g\n"
|
|
||||||
"define i32 @f() {\n"
|
|
||||||
" %1 = load i32, i32* @g\n"
|
|
||||||
" ret i32 %1\n"
|
|
||||||
"}";
|
|
||||||
|
|
||||||
Triple TargetTriple("aarch64--");
|
|
||||||
std::string Error;
|
|
||||||
const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error);
|
|
||||||
// FIXME: These tests do not depend on AArch64 specifically, but we have to
|
|
||||||
// initialize a target. A skeleton Target for unittests would allow us to
|
|
||||||
// always run these tests.
|
|
||||||
if (!T)
|
|
||||||
GTEST_SKIP();
|
|
||||||
|
|
||||||
TargetOptions Options;
|
|
||||||
TM = std::unique_ptr<TargetMachine>(
|
|
||||||
T->createTargetMachine(TargetTriple, "", "+sve", Options, std::nullopt,
|
|
||||||
std::nullopt, CodeGenOptLevel::Aggressive));
|
|
||||||
if (!TM)
|
|
||||||
GTEST_SKIP();
|
|
||||||
|
|
||||||
SMDiagnostic SMError;
|
|
||||||
M = parseAssemblyString(Assembly, SMError, Context);
|
|
||||||
if (!M)
|
|
||||||
report_fatal_error(SMError.getMessage());
|
|
||||||
M->setDataLayout(TM->createDataLayout());
|
|
||||||
|
|
||||||
F = M->getFunction("f");
|
|
||||||
if (!F)
|
|
||||||
report_fatal_error("F?");
|
|
||||||
G = M->getGlobalVariable("g");
|
|
||||||
if (!G)
|
|
||||||
report_fatal_error("G?");
|
|
||||||
AliasedG = M->getNamedAlias("g_alias");
|
|
||||||
if (!AliasedG)
|
|
||||||
report_fatal_error("AliasedG?");
|
|
||||||
|
|
||||||
MachineModuleInfo MMI(TM.get());
|
|
||||||
|
|
||||||
MF = std::make_unique<MachineFunction>(*F, *TM, *TM->getSubtargetImpl(*F),
|
|
||||||
MMI.getContext(), 0);
|
|
||||||
|
|
||||||
DAG = std::make_unique<SelectionDAG>(*TM, CodeGenOptLevel::None);
|
|
||||||
if (!DAG)
|
|
||||||
report_fatal_error("DAG?");
|
|
||||||
OptimizationRemarkEmitter ORE(F);
|
|
||||||
FunctionAnalysisManager FAM;
|
|
||||||
FAM.registerPass([&] { return TM->getTargetIRAnalysis(); });
|
|
||||||
|
|
||||||
TargetTransformInfo TTI = TM->getTargetIRAnalysis().run(*F, FAM);
|
|
||||||
DAG->init(*MF, ORE, nullptr, nullptr, nullptr, nullptr, nullptr, MMI,
|
|
||||||
nullptr, TTI.hasBranchDivergence(F));
|
|
||||||
}
|
|
||||||
|
|
||||||
TargetLoweringBase::LegalizeTypeAction getTypeAction(EVT VT) {
|
|
||||||
return DAG->getTargetLoweringInfo().getTypeAction(Context, VT);
|
|
||||||
}
|
|
||||||
|
|
||||||
EVT getTypeToTransformTo(EVT VT) {
|
|
||||||
return DAG->getTargetLoweringInfo().getTypeToTransformTo(Context, VT);
|
|
||||||
}
|
|
||||||
|
|
||||||
LLVMContext Context;
|
|
||||||
std::unique_ptr<TargetMachine> TM;
|
|
||||||
std::unique_ptr<Module> M;
|
|
||||||
Function *F;
|
|
||||||
GlobalVariable *G;
|
|
||||||
GlobalAlias *AliasedG;
|
|
||||||
std::unique_ptr<MachineFunction> MF;
|
|
||||||
std::unique_ptr<SelectionDAG> DAG;
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_F(SelectionDAGAddressAnalysisTest, sameFrameObject) {
|
TEST_F(SelectionDAGAddressAnalysisTest, sameFrameObject) {
|
||||||
SDLoc Loc;
|
SDLoc Loc;
|
||||||
|
317
llvm/unittests/CodeGen/SelectionDAGNodeConstructionTest.cpp
Normal file
317
llvm/unittests/CodeGen/SelectionDAGNodeConstructionTest.cpp
Normal file
@ -0,0 +1,317 @@
|
|||||||
|
//===---- llvm/unittest/CodeGen/SelectionDAGPatternMatchTest.cpp ----------===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "SelectionDAGTestBase.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
class SelectionDAGNodeConstructionTest : public SelectionDAGTestBase {};
|
||||||
|
|
||||||
|
TEST_F(SelectionDAGNodeConstructionTest, ADD) {
|
||||||
|
SDLoc DL;
|
||||||
|
SDValue Poison = DAG->getPOISON(MVT::i32);
|
||||||
|
SDValue Undef = DAG->getUNDEF(MVT::i32);
|
||||||
|
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Op, Poison), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Poison, Op), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Poison, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Undef, Poison), Poison);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Op, Undef), Undef);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Undef, Op), Undef);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Undef, Undef), Undef);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(SelectionDAGNodeConstructionTest, AND) {
|
||||||
|
SDLoc DL;
|
||||||
|
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
|
||||||
|
SDValue Poison = DAG->getPOISON(MVT::i32);
|
||||||
|
SDValue Undef = DAG->getUNDEF(MVT::i32);
|
||||||
|
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Op, Poison), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Poison, Op), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Poison, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Undef, Poison), Poison);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Op, Undef), Zero);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Undef, Op), Zero);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Undef, Undef), Undef);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(SelectionDAGNodeConstructionTest, MUL) {
|
||||||
|
SDLoc DL;
|
||||||
|
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
|
||||||
|
SDValue Poison = DAG->getPOISON(MVT::i32);
|
||||||
|
SDValue Undef = DAG->getUNDEF(MVT::i32);
|
||||||
|
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Op, Poison), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Poison, Op), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Poison, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Undef, Poison), Poison);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Op, Undef), Zero);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Undef, Op), Zero);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Undef, Undef), Undef);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(SelectionDAGNodeConstructionTest, OR) {
|
||||||
|
SDLoc DL;
|
||||||
|
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
|
||||||
|
SDValue Poison = DAG->getPOISON(MVT::i32);
|
||||||
|
SDValue Undef = DAG->getUNDEF(MVT::i32);
|
||||||
|
SDValue AllOnes = DAG->getAllOnesConstant(DL, MVT::i32);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Op, Poison), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Poison, Op), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Poison, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Undef, Poison), Poison);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Op, Undef), AllOnes);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Undef, Op), AllOnes);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Undef, Undef), Undef);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(SelectionDAGNodeConstructionTest, SADDSAT) {
|
||||||
|
SDLoc DL;
|
||||||
|
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
|
||||||
|
SDValue Poison = DAG->getPOISON(MVT::i32);
|
||||||
|
SDValue Undef = DAG->getUNDEF(MVT::i32);
|
||||||
|
SDValue AllOnes = DAG->getAllOnesConstant(DL, MVT::i32);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Op, Poison), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Poison, Op), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Poison, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Undef, Poison), Poison);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Op, Undef), AllOnes);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Undef, Op), AllOnes);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Undef, Undef), Undef);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(SelectionDAGNodeConstructionTest, SDIV) {
|
||||||
|
SDLoc DL;
|
||||||
|
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
|
||||||
|
SDValue Poison = DAG->getPOISON(MVT::i32);
|
||||||
|
SDValue Undef = DAG->getUNDEF(MVT::i32);
|
||||||
|
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Op, Poison), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Poison, Op), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Poison, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Undef, Poison), Poison);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Op, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Undef, Op), Zero);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Undef, Undef), Poison);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(SelectionDAGNodeConstructionTest, SMAX) {
|
||||||
|
SDLoc DL;
|
||||||
|
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
|
||||||
|
SDValue Poison = DAG->getPOISON(MVT::i32);
|
||||||
|
SDValue Undef = DAG->getUNDEF(MVT::i32);
|
||||||
|
SDValue MaxInt = DAG->getConstant(APInt::getSignedMaxValue(32), DL, MVT::i32);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Op, Poison), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Poison, Op), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Poison, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Undef, Poison), Poison);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Op, Undef), MaxInt);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Undef, Op), MaxInt);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Undef, Undef), Undef);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(SelectionDAGNodeConstructionTest, SMIN) {
|
||||||
|
SDLoc DL;
|
||||||
|
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
|
||||||
|
SDValue Poison = DAG->getPOISON(MVT::i32);
|
||||||
|
SDValue Undef = DAG->getUNDEF(MVT::i32);
|
||||||
|
SDValue MinInt = DAG->getConstant(APInt::getSignedMinValue(32), DL, MVT::i32);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Op, Poison), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Poison, Op), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Poison, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Undef, Poison), Poison);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Op, Undef), MinInt);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Undef, Op), MinInt);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Undef, Undef), Undef);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(SelectionDAGNodeConstructionTest, SREM) {
|
||||||
|
SDLoc DL;
|
||||||
|
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
|
||||||
|
SDValue Poison = DAG->getPOISON(MVT::i32);
|
||||||
|
SDValue Undef = DAG->getUNDEF(MVT::i32);
|
||||||
|
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Op, Poison), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Poison, Op), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Poison, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Undef, Poison), Poison);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Op, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Undef, Op), Zero);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Undef, Undef), Poison);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(SelectionDAGNodeConstructionTest, SSUBSAT) {
|
||||||
|
SDLoc DL;
|
||||||
|
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
|
||||||
|
SDValue Poison = DAG->getPOISON(MVT::i32);
|
||||||
|
SDValue Undef = DAG->getUNDEF(MVT::i32);
|
||||||
|
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Op, Poison), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Poison, Op), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Poison, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Undef, Poison), Poison);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Op, Undef), Zero);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Undef, Op), Zero);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Undef, Undef), Undef);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(SelectionDAGNodeConstructionTest, SUB) {
|
||||||
|
SDLoc DL;
|
||||||
|
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
|
||||||
|
SDValue Poison = DAG->getPOISON(MVT::i32);
|
||||||
|
SDValue Undef = DAG->getUNDEF(MVT::i32);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Op, Poison), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Poison, Op), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Poison, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Undef, Poison), Poison);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Op, Undef), Undef);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Undef, Op), Undef);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Undef, Undef), Undef);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(SelectionDAGNodeConstructionTest, UADDSAT) {
|
||||||
|
SDLoc DL;
|
||||||
|
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
|
||||||
|
SDValue Poison = DAG->getPOISON(MVT::i32);
|
||||||
|
SDValue Undef = DAG->getUNDEF(MVT::i32);
|
||||||
|
SDValue AllOnes = DAG->getAllOnesConstant(DL, MVT::i32);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Op, Poison), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Poison, Op), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Poison, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Undef, Poison), Poison);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Op, Undef), AllOnes);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Undef, Op), AllOnes);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Undef, Undef), Undef);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(SelectionDAGNodeConstructionTest, UDIV) {
|
||||||
|
SDLoc DL;
|
||||||
|
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
|
||||||
|
SDValue Poison = DAG->getPOISON(MVT::i32);
|
||||||
|
SDValue Undef = DAG->getUNDEF(MVT::i32);
|
||||||
|
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Op, Poison), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Poison, Op), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Poison, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Undef, Poison), Poison);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Op, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Undef, Op), Zero);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Undef, Undef), Poison);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(SelectionDAGNodeConstructionTest, UMAX) {
|
||||||
|
SDLoc DL;
|
||||||
|
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
|
||||||
|
SDValue Poison = DAG->getPOISON(MVT::i32);
|
||||||
|
SDValue Undef = DAG->getUNDEF(MVT::i32);
|
||||||
|
SDValue AllOnes = DAG->getAllOnesConstant(DL, MVT::i32);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Op, Poison), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Poison, Op), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Poison, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Undef, Poison), Poison);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Op, Undef), AllOnes);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Undef, Op), AllOnes);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Undef, Undef), Undef);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(SelectionDAGNodeConstructionTest, UMIN) {
|
||||||
|
SDLoc DL;
|
||||||
|
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
|
||||||
|
SDValue Poison = DAG->getPOISON(MVT::i32);
|
||||||
|
SDValue Undef = DAG->getUNDEF(MVT::i32);
|
||||||
|
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Op, Poison), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Poison, Op), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Poison, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Undef, Poison), Poison);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Op, Undef), Zero);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Undef, Op), Zero);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Undef, Undef), Undef);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(SelectionDAGNodeConstructionTest, UREM) {
|
||||||
|
SDLoc DL;
|
||||||
|
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
|
||||||
|
SDValue Poison = DAG->getPOISON(MVT::i32);
|
||||||
|
SDValue Undef = DAG->getUNDEF(MVT::i32);
|
||||||
|
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Op, Poison), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Poison, Op), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Poison, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Undef, Poison), Poison);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Op, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Undef, Op), Zero);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Undef, Undef), Poison);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(SelectionDAGNodeConstructionTest, USUBSAT) {
|
||||||
|
SDLoc DL;
|
||||||
|
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
|
||||||
|
SDValue Poison = DAG->getPOISON(MVT::i32);
|
||||||
|
SDValue Undef = DAG->getUNDEF(MVT::i32);
|
||||||
|
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Op, Poison), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Poison, Op), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Poison, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Undef, Poison), Poison);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Op, Undef), Zero);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Undef, Op), Zero);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Undef, Undef), Undef);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(SelectionDAGNodeConstructionTest, XOR) {
|
||||||
|
SDLoc DL;
|
||||||
|
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
|
||||||
|
SDValue Poison = DAG->getPOISON(MVT::i32);
|
||||||
|
SDValue Undef = DAG->getUNDEF(MVT::i32);
|
||||||
|
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Op, Poison), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Poison, Op), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Poison, Undef), Poison);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Undef, Poison), Poison);
|
||||||
|
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Op, Undef), Undef);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Undef, Op), Undef);
|
||||||
|
EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Undef, Undef), Zero);
|
||||||
|
}
|
@ -6,102 +6,12 @@
|
|||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
#include "SelectionDAGTestBase.h"
|
||||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
|
||||||
#include "llvm/AsmParser/Parser.h"
|
|
||||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
|
||||||
#include "llvm/CodeGen/SDPatternMatch.h"
|
#include "llvm/CodeGen/SDPatternMatch.h"
|
||||||
#include "llvm/CodeGen/TargetLowering.h"
|
|
||||||
#include "llvm/IR/Module.h"
|
|
||||||
#include "llvm/MC/TargetRegistry.h"
|
|
||||||
#include "llvm/Support/SourceMgr.h"
|
|
||||||
#include "llvm/Support/TargetSelect.h"
|
|
||||||
#include "llvm/Target/TargetMachine.h"
|
|
||||||
#include "gtest/gtest.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
class SelectionDAGPatternMatchTest : public testing::Test {
|
class SelectionDAGPatternMatchTest : public SelectionDAGTestBase {};
|
||||||
protected:
|
|
||||||
static void SetUpTestCase() {
|
|
||||||
InitializeAllTargets();
|
|
||||||
InitializeAllTargetMCs();
|
|
||||||
}
|
|
||||||
|
|
||||||
void SetUp() override {
|
|
||||||
StringRef Assembly = "@g = global i32 0\n"
|
|
||||||
"@g_alias = alias i32, i32* @g\n"
|
|
||||||
"define i32 @f() {\n"
|
|
||||||
" %1 = load i32, i32* @g\n"
|
|
||||||
" ret i32 %1\n"
|
|
||||||
"}";
|
|
||||||
|
|
||||||
Triple TargetTriple("riscv64--");
|
|
||||||
std::string Error;
|
|
||||||
const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error);
|
|
||||||
// FIXME: These tests do not depend on RISCV specifically, but we have to
|
|
||||||
// initialize a target. A skeleton Target for unittests would allow us to
|
|
||||||
// always run these tests.
|
|
||||||
if (!T)
|
|
||||||
GTEST_SKIP();
|
|
||||||
|
|
||||||
TargetOptions Options;
|
|
||||||
TM = std::unique_ptr<TargetMachine>(T->createTargetMachine(
|
|
||||||
TargetTriple, "", "+m,+f,+d,+v", Options, std::nullopt, std::nullopt,
|
|
||||||
CodeGenOptLevel::Aggressive));
|
|
||||||
if (!TM)
|
|
||||||
GTEST_SKIP();
|
|
||||||
|
|
||||||
SMDiagnostic SMError;
|
|
||||||
M = parseAssemblyString(Assembly, SMError, Context);
|
|
||||||
if (!M)
|
|
||||||
report_fatal_error(SMError.getMessage());
|
|
||||||
M->setDataLayout(TM->createDataLayout());
|
|
||||||
|
|
||||||
F = M->getFunction("f");
|
|
||||||
if (!F)
|
|
||||||
report_fatal_error("F?");
|
|
||||||
G = M->getGlobalVariable("g");
|
|
||||||
if (!G)
|
|
||||||
report_fatal_error("G?");
|
|
||||||
AliasedG = M->getNamedAlias("g_alias");
|
|
||||||
if (!AliasedG)
|
|
||||||
report_fatal_error("AliasedG?");
|
|
||||||
|
|
||||||
MachineModuleInfo MMI(TM.get());
|
|
||||||
|
|
||||||
MF = std::make_unique<MachineFunction>(*F, *TM, *TM->getSubtargetImpl(*F),
|
|
||||||
MMI.getContext(), 0);
|
|
||||||
|
|
||||||
DAG = std::make_unique<SelectionDAG>(*TM, CodeGenOptLevel::None);
|
|
||||||
if (!DAG)
|
|
||||||
report_fatal_error("DAG?");
|
|
||||||
OptimizationRemarkEmitter ORE(F);
|
|
||||||
FunctionAnalysisManager FAM;
|
|
||||||
FAM.registerPass([&] { return TM->getTargetIRAnalysis(); });
|
|
||||||
|
|
||||||
TargetTransformInfo TTI = TM->getTargetIRAnalysis().run(*F, FAM);
|
|
||||||
DAG->init(*MF, ORE, nullptr, nullptr, nullptr, nullptr, nullptr, MMI,
|
|
||||||
nullptr, TTI.hasBranchDivergence(F));
|
|
||||||
}
|
|
||||||
|
|
||||||
TargetLoweringBase::LegalizeTypeAction getTypeAction(EVT VT) {
|
|
||||||
return DAG->getTargetLoweringInfo().getTypeAction(Context, VT);
|
|
||||||
}
|
|
||||||
|
|
||||||
EVT getTypeToTransformTo(EVT VT) {
|
|
||||||
return DAG->getTargetLoweringInfo().getTypeToTransformTo(Context, VT);
|
|
||||||
}
|
|
||||||
|
|
||||||
LLVMContext Context;
|
|
||||||
std::unique_ptr<TargetMachine> TM;
|
|
||||||
std::unique_ptr<Module> M;
|
|
||||||
Function *F;
|
|
||||||
GlobalVariable *G;
|
|
||||||
GlobalAlias *AliasedG;
|
|
||||||
std::unique_ptr<MachineFunction> MF;
|
|
||||||
std::unique_ptr<SelectionDAG> DAG;
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_F(SelectionDAGPatternMatchTest, matchValueType) {
|
TEST_F(SelectionDAGPatternMatchTest, matchValueType) {
|
||||||
SDLoc DL;
|
SDLoc DL;
|
||||||
|
99
llvm/unittests/CodeGen/SelectionDAGTestBase.h
Normal file
99
llvm/unittests/CodeGen/SelectionDAGTestBase.h
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
//===---- llvm/unittest/CodeGen/SelectionDAGTestBase.h --------------------===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
||||||
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||||
|
#include "llvm/AsmParser/Parser.h"
|
||||||
|
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||||
|
#include "llvm/CodeGen/TargetLowering.h"
|
||||||
|
#include "llvm/IR/Module.h"
|
||||||
|
#include "llvm/MC/TargetRegistry.h"
|
||||||
|
#include "llvm/Support/SourceMgr.h"
|
||||||
|
#include "llvm/Support/TargetSelect.h"
|
||||||
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
class SelectionDAGTestBase : public testing::Test {
|
||||||
|
protected:
|
||||||
|
static void SetUpTestCase() {
|
||||||
|
InitializeAllTargets();
|
||||||
|
InitializeAllTargetMCs();
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetUp() override {
|
||||||
|
StringRef Assembly = "@g = global i32 0\n"
|
||||||
|
"@g_alias = alias i32, i32* @g\n"
|
||||||
|
"define i32 @f() {\n"
|
||||||
|
" %1 = load i32, i32* @g\n"
|
||||||
|
" ret i32 %1\n"
|
||||||
|
"}";
|
||||||
|
|
||||||
|
Triple TargetTriple("aarch64--");
|
||||||
|
std::string Error;
|
||||||
|
const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error);
|
||||||
|
// FIXME: These tests do not depend on AArch64 specifically, but we have to
|
||||||
|
// initialize a target. A skeleton Target for unittests would allow us to
|
||||||
|
// always run these tests.
|
||||||
|
if (!T)
|
||||||
|
GTEST_SKIP();
|
||||||
|
|
||||||
|
TargetOptions Options;
|
||||||
|
TM = std::unique_ptr<TargetMachine>(
|
||||||
|
T->createTargetMachine(TargetTriple, "", "+sve", Options, std::nullopt,
|
||||||
|
std::nullopt, CodeGenOptLevel::Aggressive));
|
||||||
|
if (!TM)
|
||||||
|
GTEST_SKIP();
|
||||||
|
|
||||||
|
SMDiagnostic SMError;
|
||||||
|
M = parseAssemblyString(Assembly, SMError, Context);
|
||||||
|
ASSERT_TRUE(M && "Could not parse module!");
|
||||||
|
M->setDataLayout(TM->createDataLayout());
|
||||||
|
|
||||||
|
F = M->getFunction("f");
|
||||||
|
ASSERT_TRUE(F && "Could not get function f!");
|
||||||
|
G = M->getGlobalVariable("g");
|
||||||
|
ASSERT_TRUE(G && "Could not get global g!");
|
||||||
|
AliasedG = M->getNamedAlias("g_alias");
|
||||||
|
ASSERT_TRUE(AliasedG && "Could not get alias g_alias!");
|
||||||
|
|
||||||
|
MachineModuleInfo MMI(TM.get());
|
||||||
|
|
||||||
|
MF = std::make_unique<MachineFunction>(*F, *TM, *TM->getSubtargetImpl(*F),
|
||||||
|
MMI.getContext(), 0);
|
||||||
|
|
||||||
|
DAG = std::make_unique<SelectionDAG>(*TM, CodeGenOptLevel::None);
|
||||||
|
if (!DAG)
|
||||||
|
reportFatalUsageError("Failed to create SelectionDAG?");
|
||||||
|
OptimizationRemarkEmitter ORE(F);
|
||||||
|
FunctionAnalysisManager FAM;
|
||||||
|
FAM.registerPass([&] { return TM->getTargetIRAnalysis(); });
|
||||||
|
|
||||||
|
TargetTransformInfo TTI = TM->getTargetIRAnalysis().run(*F, FAM);
|
||||||
|
DAG->init(*MF, ORE, nullptr, nullptr, nullptr, nullptr, nullptr, MMI,
|
||||||
|
nullptr, TTI.hasBranchDivergence(F));
|
||||||
|
}
|
||||||
|
|
||||||
|
TargetLoweringBase::LegalizeTypeAction getTypeAction(EVT VT) {
|
||||||
|
return DAG->getTargetLoweringInfo().getTypeAction(Context, VT);
|
||||||
|
}
|
||||||
|
|
||||||
|
EVT getTypeToTransformTo(EVT VT) {
|
||||||
|
return DAG->getTargetLoweringInfo().getTypeToTransformTo(Context, VT);
|
||||||
|
}
|
||||||
|
|
||||||
|
LLVMContext Context;
|
||||||
|
std::unique_ptr<TargetMachine> TM;
|
||||||
|
std::unique_ptr<Module> M;
|
||||||
|
Function *F;
|
||||||
|
GlobalVariable *G;
|
||||||
|
GlobalAlias *AliasedG;
|
||||||
|
std::unique_ptr<MachineFunction> MF;
|
||||||
|
std::unique_ptr<SelectionDAG> DAG;
|
||||||
|
};
|
Loading…
x
Reference in New Issue
Block a user