diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 27d4b8055d83..5ef9aa368082 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30074,13 +30074,28 @@ static SDValue LowerABD(SDValue Op, const X86Subtarget &Subtarget, bool IsSigned = Op.getOpcode() == ISD::ABDS; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (Subtarget.canUseCMOV() && VT.isScalarInteger()) { - X86::CondCode CC = IsSigned ? X86::COND_L : X86::COND_B; - unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + if (VT.isScalarInteger()) { + // abdu(lhs, rhs) -> (sub(lhs,rhs) XOR mask) - mask + // where mask = carry ? -1 : 0 from the subtract. + // Branchless, no CMOV needed. Preferred for i8/i16 (no CMOV available for + // sub-i32 results) and when CMOV is unavailable. + if (!IsSigned && (VT.bitsLT(MVT::i32) || !Subtarget.canUseCMOV())) { + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue Diff = DAG.getNode(X86ISD::SUB, dl, VTs, LHS, RHS); + SDValue Mask = DAG.getNode( + X86ISD::SETCC_CARRY, dl, VT, + DAG.getTargetConstant(X86::COND_B, dl, MVT::i8), Diff.getValue(1)); + SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Mask); + return DAG.getNode(ISD::SUB, dl, VT, Xor, Mask); + } + // For i32/i64 with CMOV: two subtracts + conditional move. // abds(lhs, rhs) -> select(slt(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs)) // abdu(lhs, rhs) -> select(ult(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs)) - if (VT.bitsGE(MVT::i32)) { + if (Subtarget.canUseCMOV() && VT.bitsGE(MVT::i32)) { + X86::CondCode CC = IsSigned ? X86::COND_L : X86::COND_B; SDVTList VTs = DAG.getVTList(VT, MVT::i32); SDValue LHS = DAG.getFreeze(Op.getOperand(0)); SDValue RHS = DAG.getFreeze(Op.getOperand(1)); @@ -30091,20 +30106,25 @@ static SDValue LowerABD(SDValue Op, const X86Subtarget &Subtarget, Diff1.getValue(1)); } - // abds(lhs, rhs) -> trunc(abs(sub(sext(lhs), sext(rhs)))) - // abdu(lhs, rhs) -> trunc(abs(sub(zext(lhs), zext(rhs)))) - unsigned WideBits = std::max(2 * VT.getScalarSizeInBits(), 32u); - MVT WideVT = MVT::getIntegerVT(WideBits); - if (TLI.isTypeLegal(WideVT)) { - SDVTList WideVTs = DAG.getVTList(WideVT, MVT::i32); - SDValue LHS = DAG.getNode(ExtOpc, dl, WideVT, Op.getOperand(0)); - SDValue RHS = DAG.getNode(ExtOpc, dl, WideVT, Op.getOperand(1)); - SDValue Diff0 = DAG.getNode(X86ISD::SUB, dl, WideVTs, LHS, RHS); - SDValue Diff1 = DAG.getNode(X86ISD::SUB, dl, WideVTs, RHS, LHS); - SDValue AbsDiff = DAG.getNode(X86ISD::CMOV, dl, WideVT, Diff1, Diff0, - DAG.getTargetConstant(CC, dl, MVT::i8), - Diff1.getValue(1)); - return DAG.getNode(ISD::TRUNCATE, dl, VT, AbsDiff); + assert(IsSigned && "abdu should have been handled by carry-mask path"); + + // abds i8/i16 with CMOV: sign-extend to wider type, CMOV, truncate. + if (Subtarget.canUseCMOV()) { + unsigned WideBits = std::max(2 * VT.getScalarSizeInBits(), 32u); + MVT WideVT = MVT::getIntegerVT(WideBits); + if (TLI.isTypeLegal(WideVT)) { + SDVTList WideVTs = DAG.getVTList(WideVT, MVT::i32); + SDValue LHS = + DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Op.getOperand(0)); + SDValue RHS = + DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Op.getOperand(1)); + SDValue Diff0 = DAG.getNode(X86ISD::SUB, dl, WideVTs, LHS, RHS); + SDValue Diff1 = DAG.getNode(X86ISD::SUB, dl, WideVTs, RHS, LHS); + SDValue AbsDiff = DAG.getNode( + X86ISD::CMOV, dl, WideVT, Diff1, Diff0, + DAG.getTargetConstant(X86::COND_L, dl, MVT::i8), Diff1.getValue(1)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, AbsDiff); + } } } diff --git a/llvm/test/CodeGen/X86/abdu-neg.ll b/llvm/test/CodeGen/X86/abdu-neg.ll index b7c34070f1af..cebb42751dfa 100644 --- a/llvm/test/CodeGen/X86/abdu-neg.ll +++ b/llvm/test/CodeGen/X86/abdu-neg.ll @@ -9,25 +9,22 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; X86-LABEL: abd_ext_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovbl %edx, %eax -; X86-NEXT: negb %al +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: xorb %al, %cl +; X86-NEXT: subb %cl, %al ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_ext_i8: ; X64: # %bb.0: -; X64-NEXT: movzbl %sil, %eax -; X64-NEXT: movzbl %dil, %ecx -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: subl %eax, %edx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edx, %eax -; X64-NEXT: negb %al +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: subb %sil, %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: xorb %al, %dil +; X64-NEXT: subb %dil, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %aext = zext i8 %a to i64 @@ -42,23 +39,24 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; X86-LABEL: abd_ext_i8_i16: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: subw {{[0-9]+}}(%esp), %ax +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovbl %edx, %eax ; X86-NEXT: negb %al ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_ext_i8_i16: ; X64: # %bb.0: -; X64-NEXT: movzbl %dil, %ecx -; X64-NEXT: subl %esi, %edi -; X64-NEXT: movzwl %si, %eax +; X64-NEXT: movzbl %dil, %eax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: subw %si, %ax +; X64-NEXT: sbbl %ecx, %ecx +; X64-NEXT: xorl %ecx, %eax ; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edi, %eax ; X64-NEXT: negb %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -74,25 +72,22 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; X86-LABEL: abd_ext_i8_undef: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovbl %edx, %eax -; X86-NEXT: negb %al +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: xorb %al, %cl +; X86-NEXT: subb %cl, %al ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_ext_i8_undef: ; X64: # %bb.0: -; X64-NEXT: movzbl %sil, %eax -; X64-NEXT: movzbl %dil, %ecx -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: subl %eax, %edx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edx, %eax -; X64-NEXT: negb %al +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: subb %sil, %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: xorb %al, %dil +; X64-NEXT: subb %dil, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %aext = zext i8 %a to i64 @@ -107,23 +102,22 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; X86-LABEL: abd_ext_i16: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: subw {{[0-9]+}}(%esp), %cx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: xorl %eax, %ecx ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovael %edx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_ext_i16: ; X64: # %bb.0: -; X64-NEXT: movzwl %di, %ecx -; X64-NEXT: subl %esi, %edi -; X64-NEXT: movzwl %si, %eax -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edi, %eax -; X64-NEXT: negl %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: subw %si, %di +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: xorl %eax, %edi +; X64-NEXT: subl %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %aext = zext i16 %a to i64 @@ -169,23 +163,22 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; X86-LABEL: abd_ext_i16_undef: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: subw {{[0-9]+}}(%esp), %cx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: xorl %eax, %ecx ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovael %edx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_ext_i16_undef: ; X64: # %bb.0: -; X64-NEXT: movzwl %di, %ecx -; X64-NEXT: subl %esi, %edi -; X64-NEXT: movzwl %si, %eax -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edi, %eax -; X64-NEXT: negl %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: subw %si, %di +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: xorl %eax, %edi +; X64-NEXT: subl %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %aext = zext i16 %a to i64 @@ -502,25 +495,22 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { ; X86-LABEL: abd_minmax_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovbl %edx, %eax -; X86-NEXT: negb %al +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: xorb %al, %cl +; X86-NEXT: subb %cl, %al ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_minmax_i8: ; X64: # %bb.0: -; X64-NEXT: movzbl %sil, %eax -; X64-NEXT: movzbl %dil, %ecx -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: subl %eax, %edx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edx, %eax -; X64-NEXT: negb %al +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: subb %sil, %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: xorb %al, %dil +; X64-NEXT: subb %dil, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %min = call i8 @llvm.umin.i8(i8 %a, i8 %b) @@ -532,23 +522,22 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { ; X86-LABEL: abd_minmax_i16: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: subw {{[0-9]+}}(%esp), %cx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: xorl %eax, %ecx ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovael %edx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_minmax_i16: ; X64: # %bb.0: -; X64-NEXT: movzwl %di, %ecx -; X64-NEXT: subl %esi, %edi -; X64-NEXT: movzwl %si, %eax -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edi, %eax -; X64-NEXT: negl %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: subw %si, %di +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: xorl %eax, %edi +; X64-NEXT: subl %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %min = call i16 @llvm.umin.i16(i16 %a, i16 %b) @@ -719,25 +708,22 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; X86-LABEL: abd_cmp_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovbl %edx, %eax -; X86-NEXT: negb %al +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: xorb %al, %cl +; X86-NEXT: subb %cl, %al ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_cmp_i8: ; X64: # %bb.0: -; X64-NEXT: movzbl %sil, %eax -; X64-NEXT: movzbl %dil, %ecx -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: subl %eax, %edx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edx, %eax -; X64-NEXT: negb %al +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: subb %sil, %dil +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: xorb %al, %dil +; X64-NEXT: subb %dil, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %cmp = icmp ule i8 %a, %b @@ -750,23 +736,22 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; X86-LABEL: abd_cmp_i16: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: subw {{[0-9]+}}(%esp), %cx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: xorl %eax, %ecx ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovael %edx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_cmp_i16: ; X64: # %bb.0: -; X64-NEXT: movzwl %di, %ecx -; X64-NEXT: subl %esi, %edi -; X64-NEXT: movzwl %si, %eax -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edi, %eax -; X64-NEXT: negl %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: subw %si, %di +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: xorl %eax, %edi +; X64-NEXT: subl %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %cmp = icmp ult i16 %a, %b diff --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll index 043c9155f52f..b8bc3649773f 100644 --- a/llvm/test/CodeGen/X86/abdu.ll +++ b/llvm/test/CodeGen/X86/abdu.ll @@ -10,22 +10,21 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; X86-LABEL: abd_ext_i8: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovbl %edx, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: xorb %cl, %al +; X86-NEXT: subb %cl, %al ; X86-NEXT: retl ; ; X64-LABEL: abd_ext_i8: ; X64: # %bb.0: -; X64-NEXT: movzbl %sil, %eax -; X64-NEXT: movzbl %dil, %ecx -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: subl %eax, %edx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edx, %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: subb %sil, %al +; X64-NEXT: sbbl %ecx, %ecx +; X64-NEXT: xorb %cl, %al +; X64-NEXT: subb %cl, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %aext = zext i8 %a to i64 @@ -39,22 +38,23 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; X86-LABEL: abd_ext_i8_i16: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: subw {{[0-9]+}}(%esp), %ax +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovbl %edx, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_ext_i8_i16: ; X64: # %bb.0: -; X64-NEXT: movzbl %dil, %ecx -; X64-NEXT: subl %esi, %edi -; X64-NEXT: movzwl %si, %eax +; X64-NEXT: movzbl %dil, %eax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: subw %si, %ax +; X64-NEXT: sbbl %ecx, %ecx +; X64-NEXT: xorl %ecx, %eax ; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edi, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %aext = zext i8 %a to i64 @@ -69,22 +69,21 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; X86-LABEL: abd_ext_i8_undef: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovbl %edx, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: xorb %cl, %al +; X86-NEXT: subb %cl, %al ; X86-NEXT: retl ; ; X64-LABEL: abd_ext_i8_undef: ; X64: # %bb.0: -; X64-NEXT: movzbl %sil, %eax -; X64-NEXT: movzbl %dil, %ecx -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: subl %eax, %edx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edx, %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: subb %sil, %al +; X64-NEXT: sbbl %ecx, %ecx +; X64-NEXT: xorb %cl, %al +; X64-NEXT: subb %cl, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %aext = zext i8 %a to i64 @@ -99,22 +98,22 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; X86-LABEL: abd_ext_i16: ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: subw {{[0-9]+}}(%esp), %ax +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovbl %edx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_ext_i16: ; X64: # %bb.0: -; X64-NEXT: movzwl %si, %eax -; X64-NEXT: movzwl %di, %ecx -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: subl %eax, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: subw %si, %ax +; X64-NEXT: sbbl %ecx, %ecx +; X64-NEXT: xorl %ecx, %eax ; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edx, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %aext = zext i16 %a to i64 @@ -158,22 +157,22 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; X86-LABEL: abd_ext_i16_undef: ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: subw {{[0-9]+}}(%esp), %ax +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovbl %edx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_ext_i16_undef: ; X64: # %bb.0: -; X64-NEXT: movzwl %si, %eax -; X64-NEXT: movzwl %di, %ecx -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: subl %eax, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: subw %si, %ax +; X64-NEXT: sbbl %ecx, %ecx +; X64-NEXT: xorl %ecx, %eax ; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edx, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %aext = zext i16 %a to i64 @@ -447,22 +446,21 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { ; X86-LABEL: abd_minmax_i8: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovbl %edx, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: xorb %cl, %al +; X86-NEXT: subb %cl, %al ; X86-NEXT: retl ; ; X64-LABEL: abd_minmax_i8: ; X64: # %bb.0: -; X64-NEXT: movzbl %sil, %eax -; X64-NEXT: movzbl %dil, %ecx -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: subl %eax, %edx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edx, %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: subb %sil, %al +; X64-NEXT: sbbl %ecx, %ecx +; X64-NEXT: xorb %cl, %al +; X64-NEXT: subb %cl, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %min = call i8 @llvm.umin.i8(i8 %a, i8 %b) @@ -475,22 +473,22 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { ; X86-LABEL: abd_minmax_i16: ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: subw {{[0-9]+}}(%esp), %ax +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovbl %edx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_minmax_i16: ; X64: # %bb.0: -; X64-NEXT: movzwl %si, %eax -; X64-NEXT: movzwl %di, %ecx -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: subl %eax, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: subw %si, %ax +; X64-NEXT: sbbl %ecx, %ecx +; X64-NEXT: xorl %ecx, %eax ; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edx, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %min = call i16 @llvm.umin.i16(i16 %a, i16 %b) @@ -615,22 +613,21 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; X86-LABEL: abd_cmp_i8: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovbl %edx, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: xorb %cl, %al +; X86-NEXT: subb %cl, %al ; X86-NEXT: retl ; ; X64-LABEL: abd_cmp_i8: ; X64: # %bb.0: -; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: movzbl %sil, %ecx -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: subl %eax, %edx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edx, %eax +; X64-NEXT: movl %esi, %eax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: subb %dil, %al +; X64-NEXT: sbbl %ecx, %ecx +; X64-NEXT: xorb %cl, %al +; X64-NEXT: subb %cl, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %cmp = icmp ugt i8 %a, %b @@ -644,22 +641,22 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; X86-LABEL: abd_cmp_i16: ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: subw {{[0-9]+}}(%esp), %ax +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovbl %edx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_cmp_i16: ; X64: # %bb.0: -; X64-NEXT: movzwl %si, %eax -; X64-NEXT: movzwl %di, %ecx -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: subl %eax, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: subw %si, %ax +; X64-NEXT: sbbl %ecx, %ecx +; X64-NEXT: xorl %ecx, %eax ; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edx, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %cmp = icmp uge i16 %a, %b @@ -788,22 +785,21 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { ; X86-LABEL: abd_select_i8: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovbl %edx, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: xorb %cl, %al +; X86-NEXT: subb %cl, %al ; X86-NEXT: retl ; ; X64-LABEL: abd_select_i8: ; X64: # %bb.0: -; X64-NEXT: movzbl %sil, %eax -; X64-NEXT: movzbl %dil, %ecx -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: subl %eax, %edx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edx, %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: subb %sil, %al +; X64-NEXT: sbbl %ecx, %ecx +; X64-NEXT: xorb %cl, %al +; X64-NEXT: subb %cl, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %cmp = icmp ult i8 %a, %b @@ -817,22 +813,22 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { ; X86-LABEL: abd_select_i16: ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %eax, %edx +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: subw {{[0-9]+}}(%esp), %ax +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovbl %edx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_select_i16: ; X64: # %bb.0: -; X64-NEXT: movzwl %si, %eax -; X64-NEXT: movzwl %di, %ecx -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: subl %eax, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: subw %si, %ax +; X64-NEXT: sbbl %ecx, %ecx +; X64-NEXT: xorl %ecx, %eax ; X64-NEXT: subl %ecx, %eax -; X64-NEXT: cmovbl %edx, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %cmp = icmp ule i16 %a, %b diff --git a/llvm/test/CodeGen/X86/midpoint-int.ll b/llvm/test/CodeGen/X86/midpoint-int.ll index ffbb4bf06deb..4de448a099b3 100644 --- a/llvm/test/CodeGen/X86/midpoint-int.ll +++ b/llvm/test/CodeGen/X86/midpoint-int.ll @@ -81,20 +81,16 @@ define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind { ; X86: # %bb.0: ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: subl %esi, %edx +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: xorl %ebx, %ebx ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: subl %esi, %eax +; X86-NEXT: subl {{[0-9]+}}(%esp), %eax ; X86-NEXT: setbe %bl ; X86-NEXT: leal -1(%ebx,%ebx), %esi -; X86-NEXT: ja .LBB1_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: negl %edx -; X86-NEXT: movl %edx, %eax -; X86-NEXT: .LBB1_2: +; X86-NEXT: sbbl %edx, %edx +; X86-NEXT: xorl %edx, %eax +; X86-NEXT: subl %edx, %eax ; X86-NEXT: shrl %eax ; X86-NEXT: imull %esi, %eax ; X86-NEXT: addl %ecx, %eax @@ -689,16 +685,15 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind { ; X64-LABEL: scalar_i16_unsigned_reg_reg: ; X64: # %bb.0: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpw %di, %si -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %ecx -; X64-NEXT: movl %edi, %eax -; X64-NEXT: subl %esi, %eax -; X64-NEXT: movzwl %di, %edx -; X64-NEXT: movzwl %si, %esi -; X64-NEXT: subl %edx, %esi -; X64-NEXT: cmovbl %eax, %esi -; X64-NEXT: movzwl %si, %eax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: movl %edi, %edx +; X64-NEXT: subw %si, %dx +; X64-NEXT: setbe %cl +; X64-NEXT: leal -1(%rcx,%rcx), %ecx +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: xorl %eax, %edx +; X64-NEXT: subl %eax, %edx +; X64-NEXT: movzwl %dx, %eax ; X64-NEXT: shrl %eax ; X64-NEXT: imull %ecx, %eax ; X64-NEXT: addl %edi, %eax @@ -707,24 +702,26 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind { ; ; X86-LABEL: scalar_i16_unsigned_reg_reg: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorl %ebx, %ebx -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: subw %dx, %ax -; X86-NEXT: setbe %bl -; X86-NEXT: leal -1(%ebx,%ebx), %edx -; X86-NEXT: ja .LBB11_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: negl %eax -; X86-NEXT: .LBB11_2: -; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %ecx, %edi +; X86-NEXT: subw %si, %di +; X86-NEXT: setbe %dl +; X86-NEXT: leal -1(%edx,%edx), %edx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: xorl %eax, %edi +; X86-NEXT: subl %eax, %edi +; X86-NEXT: movzwl %di, %eax ; X86-NEXT: shrl %eax ; X86-NEXT: imull %edx, %eax ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: popl %ebx +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl %t3 = icmp ugt i16 %a1, %a2 %t4 = select i1 %t3, i16 -1, i16 1 @@ -960,36 +957,33 @@ define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { ; X64-LABEL: scalar_i8_unsigned_reg_reg: ; X64: # %bb.0: ; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: cmpb %dil, %sil +; X64-NEXT: movl %edi, %eax +; X64-NEXT: subb %sil, %al +; X64-NEXT: seta %dl ; X64-NEXT: sbbl %ecx, %ecx -; X64-NEXT: orb $1, %cl -; X64-NEXT: movzbl %dil, %edx -; X64-NEXT: subl %esi, %edi -; X64-NEXT: movzbl %sil, %eax -; X64-NEXT: subl %edx, %eax -; X64-NEXT: cmovbl %edi, %eax +; X64-NEXT: negb %dl +; X64-NEXT: orb $1, %dl +; X64-NEXT: xorb %cl, %al +; X64-NEXT: subb %cl, %al ; X64-NEXT: shrb %al -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: mulb %cl -; X64-NEXT: addb %dl, %al +; X64-NEXT: mulb %dl +; X64-NEXT: addb %dil, %al ; X64-NEXT: retq ; ; X86-LABEL: scalar_i8_unsigned_reg_reg: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movb {{[0-9]+}}(%esp), %ah -; X86-NEXT: movb %cl, %al -; X86-NEXT: subb %ah, %al -; X86-NEXT: seta %dl -; X86-NEXT: ja .LBB16_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: subb %cl, %ah -; X86-NEXT: movb %ah, %al -; X86-NEXT: .LBB16_2: -; X86-NEXT: negb %dl -; X86-NEXT: orb $1, %dl +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-NEXT: seta %ah +; X86-NEXT: sbbl %edx, %edx +; X86-NEXT: negb %ah +; X86-NEXT: orb $1, %ah +; X86-NEXT: xorb %dl, %al +; X86-NEXT: subb %dl, %al ; X86-NEXT: shrb %al -; X86-NEXT: mulb %dl +; X86-NEXT: mulb %ah ; X86-NEXT: addb %cl, %al ; X86-NEXT: retl %t3 = icmp ugt i8 %a1, %a2