[X86] Prefer branchless code with sbb for abdu (#187783)

We can use the negate if carry trick for abdu, and it works on all legal for sbb
This commit is contained in:
SiliconA-Z 2026-03-22 07:07:41 -04:00 committed by GitHub
parent 5324c23d6c
commit a0d5508563
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 283 additions and 288 deletions

View File

@ -30074,13 +30074,28 @@ static SDValue LowerABD(SDValue Op, const X86Subtarget &Subtarget,
bool IsSigned = Op.getOpcode() == ISD::ABDS;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (Subtarget.canUseCMOV() && VT.isScalarInteger()) {
X86::CondCode CC = IsSigned ? X86::COND_L : X86::COND_B;
unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
if (VT.isScalarInteger()) {
// abdu(lhs, rhs) -> (sub(lhs,rhs) XOR mask) - mask
// where mask = carry ? -1 : 0 from the subtract.
// Branchless, no CMOV needed. Preferred for i8/i16 (no CMOV available for
// sub-i32 results) and when CMOV is unavailable.
if (!IsSigned && (VT.bitsLT(MVT::i32) || !Subtarget.canUseCMOV())) {
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue Diff = DAG.getNode(X86ISD::SUB, dl, VTs, LHS, RHS);
SDValue Mask = DAG.getNode(
X86ISD::SETCC_CARRY, dl, VT,
DAG.getTargetConstant(X86::COND_B, dl, MVT::i8), Diff.getValue(1));
SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Mask);
return DAG.getNode(ISD::SUB, dl, VT, Xor, Mask);
}
// For i32/i64 with CMOV: two subtracts + conditional move.
// abds(lhs, rhs) -> select(slt(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs))
// abdu(lhs, rhs) -> select(ult(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs))
if (VT.bitsGE(MVT::i32)) {
if (Subtarget.canUseCMOV() && VT.bitsGE(MVT::i32)) {
X86::CondCode CC = IsSigned ? X86::COND_L : X86::COND_B;
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
SDValue LHS = DAG.getFreeze(Op.getOperand(0));
SDValue RHS = DAG.getFreeze(Op.getOperand(1));
@ -30091,20 +30106,25 @@ static SDValue LowerABD(SDValue Op, const X86Subtarget &Subtarget,
Diff1.getValue(1));
}
// abds(lhs, rhs) -> trunc(abs(sub(sext(lhs), sext(rhs))))
// abdu(lhs, rhs) -> trunc(abs(sub(zext(lhs), zext(rhs))))
unsigned WideBits = std::max<unsigned>(2 * VT.getScalarSizeInBits(), 32u);
MVT WideVT = MVT::getIntegerVT(WideBits);
if (TLI.isTypeLegal(WideVT)) {
SDVTList WideVTs = DAG.getVTList(WideVT, MVT::i32);
SDValue LHS = DAG.getNode(ExtOpc, dl, WideVT, Op.getOperand(0));
SDValue RHS = DAG.getNode(ExtOpc, dl, WideVT, Op.getOperand(1));
SDValue Diff0 = DAG.getNode(X86ISD::SUB, dl, WideVTs, LHS, RHS);
SDValue Diff1 = DAG.getNode(X86ISD::SUB, dl, WideVTs, RHS, LHS);
SDValue AbsDiff = DAG.getNode(X86ISD::CMOV, dl, WideVT, Diff1, Diff0,
DAG.getTargetConstant(CC, dl, MVT::i8),
Diff1.getValue(1));
return DAG.getNode(ISD::TRUNCATE, dl, VT, AbsDiff);
assert(IsSigned && "abdu should have been handled by carry-mask path");
// abds i8/i16 with CMOV: sign-extend to wider type, CMOV, truncate.
if (Subtarget.canUseCMOV()) {
unsigned WideBits = std::max<unsigned>(2 * VT.getScalarSizeInBits(), 32u);
MVT WideVT = MVT::getIntegerVT(WideBits);
if (TLI.isTypeLegal(WideVT)) {
SDVTList WideVTs = DAG.getVTList(WideVT, MVT::i32);
SDValue LHS =
DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Op.getOperand(0));
SDValue RHS =
DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Op.getOperand(1));
SDValue Diff0 = DAG.getNode(X86ISD::SUB, dl, WideVTs, LHS, RHS);
SDValue Diff1 = DAG.getNode(X86ISD::SUB, dl, WideVTs, RHS, LHS);
SDValue AbsDiff = DAG.getNode(
X86ISD::CMOV, dl, WideVT, Diff1, Diff0,
DAG.getTargetConstant(X86::COND_L, dl, MVT::i8), Diff1.getValue(1));
return DAG.getNode(ISD::TRUNCATE, dl, VT, AbsDiff);
}
}
}

View File

@ -9,25 +9,22 @@
define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; X86-LABEL: abd_ext_i8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovbl %edx, %eax
; X86-NEXT: negb %al
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NEXT: sbbl %eax, %eax
; X86-NEXT: xorb %al, %cl
; X86-NEXT: subb %cl, %al
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i8:
; X64: # %bb.0:
; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: movzbl %dil, %ecx
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: subl %eax, %edx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edx, %eax
; X64-NEXT: negb %al
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: subb %sil, %dil
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: xorb %al, %dil
; X64-NEXT: subb %dil, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%aext = zext i8 %a to i64
@ -42,23 +39,24 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; X86-LABEL: abd_ext_i8_i16:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subw {{[0-9]+}}(%esp), %ax
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: xorl %ecx, %eax
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovbl %edx, %eax
; X86-NEXT: negb %al
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i8_i16:
; X64: # %bb.0:
; X64-NEXT: movzbl %dil, %ecx
; X64-NEXT: subl %esi, %edi
; X64-NEXT: movzwl %si, %eax
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: subw %si, %ax
; X64-NEXT: sbbl %ecx, %ecx
; X64-NEXT: xorl %ecx, %eax
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edi, %eax
; X64-NEXT: negb %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
@ -74,25 +72,22 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; X86-LABEL: abd_ext_i8_undef:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovbl %edx, %eax
; X86-NEXT: negb %al
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NEXT: sbbl %eax, %eax
; X86-NEXT: xorb %al, %cl
; X86-NEXT: subb %cl, %al
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i8_undef:
; X64: # %bb.0:
; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: movzbl %dil, %ecx
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: subl %eax, %edx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edx, %eax
; X64-NEXT: negb %al
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: subb %sil, %dil
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: xorb %al, %dil
; X64-NEXT: subb %dil, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%aext = zext i8 %a to i64
@ -107,23 +102,22 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; X86-LABEL: abd_ext_i16:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: subw {{[0-9]+}}(%esp), %cx
; X86-NEXT: sbbl %eax, %eax
; X86-NEXT: xorl %eax, %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovael %edx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i16:
; X64: # %bb.0:
; X64-NEXT: movzwl %di, %ecx
; X64-NEXT: subl %esi, %edi
; X64-NEXT: movzwl %si, %eax
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edi, %eax
; X64-NEXT: negl %eax
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: subw %si, %di
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: xorl %eax, %edi
; X64-NEXT: subl %edi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%aext = zext i16 %a to i64
@ -169,23 +163,22 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; X86-LABEL: abd_ext_i16_undef:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: subw {{[0-9]+}}(%esp), %cx
; X86-NEXT: sbbl %eax, %eax
; X86-NEXT: xorl %eax, %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovael %edx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i16_undef:
; X64: # %bb.0:
; X64-NEXT: movzwl %di, %ecx
; X64-NEXT: subl %esi, %edi
; X64-NEXT: movzwl %si, %eax
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edi, %eax
; X64-NEXT: negl %eax
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: subw %si, %di
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: xorl %eax, %edi
; X64-NEXT: subl %edi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%aext = zext i16 %a to i64
@ -502,25 +495,22 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
; X86-LABEL: abd_minmax_i8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovbl %edx, %eax
; X86-NEXT: negb %al
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NEXT: sbbl %eax, %eax
; X86-NEXT: xorb %al, %cl
; X86-NEXT: subb %cl, %al
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_minmax_i8:
; X64: # %bb.0:
; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: movzbl %dil, %ecx
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: subl %eax, %edx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edx, %eax
; X64-NEXT: negb %al
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: subb %sil, %dil
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: xorb %al, %dil
; X64-NEXT: subb %dil, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%min = call i8 @llvm.umin.i8(i8 %a, i8 %b)
@ -532,23 +522,22 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
; X86-LABEL: abd_minmax_i16:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: subw {{[0-9]+}}(%esp), %cx
; X86-NEXT: sbbl %eax, %eax
; X86-NEXT: xorl %eax, %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovael %edx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_minmax_i16:
; X64: # %bb.0:
; X64-NEXT: movzwl %di, %ecx
; X64-NEXT: subl %esi, %edi
; X64-NEXT: movzwl %si, %eax
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edi, %eax
; X64-NEXT: negl %eax
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: subw %si, %di
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: xorl %eax, %edi
; X64-NEXT: subl %edi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%min = call i16 @llvm.umin.i16(i16 %a, i16 %b)
@ -719,25 +708,22 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
; X86-LABEL: abd_cmp_i8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovbl %edx, %eax
; X86-NEXT: negb %al
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NEXT: sbbl %eax, %eax
; X86-NEXT: xorb %al, %cl
; X86-NEXT: subb %cl, %al
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_cmp_i8:
; X64: # %bb.0:
; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: movzbl %dil, %ecx
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: subl %eax, %edx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edx, %eax
; X64-NEXT: negb %al
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: subb %sil, %dil
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: xorb %al, %dil
; X64-NEXT: subb %dil, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%cmp = icmp ule i8 %a, %b
@ -750,23 +736,22 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
; X86-LABEL: abd_cmp_i16:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: subw {{[0-9]+}}(%esp), %cx
; X86-NEXT: sbbl %eax, %eax
; X86-NEXT: xorl %eax, %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovael %edx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_cmp_i16:
; X64: # %bb.0:
; X64-NEXT: movzwl %di, %ecx
; X64-NEXT: subl %esi, %edi
; X64-NEXT: movzwl %si, %eax
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edi, %eax
; X64-NEXT: negl %eax
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: subw %si, %di
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: xorl %eax, %edi
; X64-NEXT: subl %edi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%cmp = icmp ult i16 %a, %b

View File

@ -10,22 +10,21 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; X86-LABEL: abd_ext_i8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovbl %edx, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subb {{[0-9]+}}(%esp), %al
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: xorb %cl, %al
; X86-NEXT: subb %cl, %al
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i8:
; X64: # %bb.0:
; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: movzbl %dil, %ecx
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: subl %eax, %edx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edx, %eax
; X64-NEXT: movl %edi, %eax
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: subb %sil, %al
; X64-NEXT: sbbl %ecx, %ecx
; X64-NEXT: xorb %cl, %al
; X64-NEXT: subb %cl, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%aext = zext i8 %a to i64
@ -39,22 +38,23 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; X86-LABEL: abd_ext_i8_i16:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subw {{[0-9]+}}(%esp), %ax
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: xorl %ecx, %eax
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovbl %edx, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i8_i16:
; X64: # %bb.0:
; X64-NEXT: movzbl %dil, %ecx
; X64-NEXT: subl %esi, %edi
; X64-NEXT: movzwl %si, %eax
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: subw %si, %ax
; X64-NEXT: sbbl %ecx, %ecx
; X64-NEXT: xorl %ecx, %eax
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edi, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%aext = zext i8 %a to i64
@ -69,22 +69,21 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; X86-LABEL: abd_ext_i8_undef:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovbl %edx, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subb {{[0-9]+}}(%esp), %al
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: xorb %cl, %al
; X86-NEXT: subb %cl, %al
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i8_undef:
; X64: # %bb.0:
; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: movzbl %dil, %ecx
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: subl %eax, %edx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edx, %eax
; X64-NEXT: movl %edi, %eax
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: subb %sil, %al
; X64-NEXT: sbbl %ecx, %ecx
; X64-NEXT: xorb %cl, %al
; X64-NEXT: subb %cl, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%aext = zext i8 %a to i64
@ -99,22 +98,22 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; X86-LABEL: abd_ext_i16:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subw {{[0-9]+}}(%esp), %ax
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: xorl %ecx, %eax
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovbl %edx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i16:
; X64: # %bb.0:
; X64-NEXT: movzwl %si, %eax
; X64-NEXT: movzwl %di, %ecx
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: subl %eax, %edx
; X64-NEXT: movl %edi, %eax
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: subw %si, %ax
; X64-NEXT: sbbl %ecx, %ecx
; X64-NEXT: xorl %ecx, %eax
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%aext = zext i16 %a to i64
@ -158,22 +157,22 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; X86-LABEL: abd_ext_i16_undef:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subw {{[0-9]+}}(%esp), %ax
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: xorl %ecx, %eax
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovbl %edx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i16_undef:
; X64: # %bb.0:
; X64-NEXT: movzwl %si, %eax
; X64-NEXT: movzwl %di, %ecx
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: subl %eax, %edx
; X64-NEXT: movl %edi, %eax
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: subw %si, %ax
; X64-NEXT: sbbl %ecx, %ecx
; X64-NEXT: xorl %ecx, %eax
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%aext = zext i16 %a to i64
@ -447,22 +446,21 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
; X86-LABEL: abd_minmax_i8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovbl %edx, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subb {{[0-9]+}}(%esp), %al
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: xorb %cl, %al
; X86-NEXT: subb %cl, %al
; X86-NEXT: retl
;
; X64-LABEL: abd_minmax_i8:
; X64: # %bb.0:
; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: movzbl %dil, %ecx
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: subl %eax, %edx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edx, %eax
; X64-NEXT: movl %edi, %eax
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: subb %sil, %al
; X64-NEXT: sbbl %ecx, %ecx
; X64-NEXT: xorb %cl, %al
; X64-NEXT: subb %cl, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%min = call i8 @llvm.umin.i8(i8 %a, i8 %b)
@ -475,22 +473,22 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
; X86-LABEL: abd_minmax_i16:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subw {{[0-9]+}}(%esp), %ax
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: xorl %ecx, %eax
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovbl %edx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_minmax_i16:
; X64: # %bb.0:
; X64-NEXT: movzwl %si, %eax
; X64-NEXT: movzwl %di, %ecx
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: subl %eax, %edx
; X64-NEXT: movl %edi, %eax
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: subw %si, %ax
; X64-NEXT: sbbl %ecx, %ecx
; X64-NEXT: xorl %ecx, %eax
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%min = call i16 @llvm.umin.i16(i16 %a, i16 %b)
@ -615,22 +613,21 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
; X86-LABEL: abd_cmp_i8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovbl %edx, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subb {{[0-9]+}}(%esp), %al
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: xorb %cl, %al
; X86-NEXT: subb %cl, %al
; X86-NEXT: retl
;
; X64-LABEL: abd_cmp_i8:
; X64: # %bb.0:
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: movzbl %sil, %ecx
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: subl %eax, %edx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edx, %eax
; X64-NEXT: movl %esi, %eax
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: subb %dil, %al
; X64-NEXT: sbbl %ecx, %ecx
; X64-NEXT: xorb %cl, %al
; X64-NEXT: subb %cl, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%cmp = icmp ugt i8 %a, %b
@ -644,22 +641,22 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
; X86-LABEL: abd_cmp_i16:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subw {{[0-9]+}}(%esp), %ax
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: xorl %ecx, %eax
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovbl %edx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_cmp_i16:
; X64: # %bb.0:
; X64-NEXT: movzwl %si, %eax
; X64-NEXT: movzwl %di, %ecx
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: subl %eax, %edx
; X64-NEXT: movl %edi, %eax
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: subw %si, %ax
; X64-NEXT: sbbl %ecx, %ecx
; X64-NEXT: xorl %ecx, %eax
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%cmp = icmp uge i16 %a, %b
@ -788,22 +785,21 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
; X86-LABEL: abd_select_i8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovbl %edx, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subb {{[0-9]+}}(%esp), %al
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: xorb %cl, %al
; X86-NEXT: subb %cl, %al
; X86-NEXT: retl
;
; X64-LABEL: abd_select_i8:
; X64: # %bb.0:
; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: movzbl %dil, %ecx
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: subl %eax, %edx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edx, %eax
; X64-NEXT: movl %edi, %eax
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: subb %sil, %al
; X64-NEXT: sbbl %ecx, %ecx
; X64-NEXT: xorb %cl, %al
; X64-NEXT: subb %cl, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%cmp = icmp ult i8 %a, %b
@ -817,22 +813,22 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
; X86-LABEL: abd_select_i16:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subw {{[0-9]+}}(%esp), %ax
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: xorl %ecx, %eax
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovbl %edx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_select_i16:
; X64: # %bb.0:
; X64-NEXT: movzwl %si, %eax
; X64-NEXT: movzwl %di, %ecx
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: subl %eax, %edx
; X64-NEXT: movl %edi, %eax
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: subw %si, %ax
; X64-NEXT: sbbl %ecx, %ecx
; X64-NEXT: xorl %ecx, %eax
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovbl %edx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%cmp = icmp ule i16 %a, %b

View File

@ -81,20 +81,16 @@ define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind {
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: subl %esi, %edx
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: xorl %ebx, %ebx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: subl %esi, %eax
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
; X86-NEXT: setbe %bl
; X86-NEXT: leal -1(%ebx,%ebx), %esi
; X86-NEXT: ja .LBB1_2
; X86-NEXT: # %bb.1:
; X86-NEXT: negl %edx
; X86-NEXT: movl %edx, %eax
; X86-NEXT: .LBB1_2:
; X86-NEXT: sbbl %edx, %edx
; X86-NEXT: xorl %edx, %eax
; X86-NEXT: subl %edx, %eax
; X86-NEXT: shrl %eax
; X86-NEXT: imull %esi, %eax
; X86-NEXT: addl %ecx, %eax
@ -689,16 +685,15 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
; X64-LABEL: scalar_i16_unsigned_reg_reg:
; X64: # %bb.0:
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpw %di, %si
; X64-NEXT: setae %al
; X64-NEXT: leal -1(%rax,%rax), %ecx
; X64-NEXT: movl %edi, %eax
; X64-NEXT: subl %esi, %eax
; X64-NEXT: movzwl %di, %edx
; X64-NEXT: movzwl %si, %esi
; X64-NEXT: subl %edx, %esi
; X64-NEXT: cmovbl %eax, %esi
; X64-NEXT: movzwl %si, %eax
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: movl %edi, %edx
; X64-NEXT: subw %si, %dx
; X64-NEXT: setbe %cl
; X64-NEXT: leal -1(%rcx,%rcx), %ecx
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: xorl %eax, %edx
; X64-NEXT: subl %eax, %edx
; X64-NEXT: movzwl %dx, %eax
; X64-NEXT: shrl %eax
; X64-NEXT: imull %ecx, %eax
; X64-NEXT: addl %edi, %eax
@ -707,24 +702,26 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
;
; X86-LABEL: scalar_i16_unsigned_reg_reg:
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorl %ebx, %ebx
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: subw %dx, %ax
; X86-NEXT: setbe %bl
; X86-NEXT: leal -1(%ebx,%ebx), %edx
; X86-NEXT: ja .LBB11_2
; X86-NEXT: # %bb.1:
; X86-NEXT: negl %eax
; X86-NEXT: .LBB11_2:
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %ecx, %edi
; X86-NEXT: subw %si, %di
; X86-NEXT: setbe %dl
; X86-NEXT: leal -1(%edx,%edx), %edx
; X86-NEXT: sbbl %eax, %eax
; X86-NEXT: xorl %eax, %edi
; X86-NEXT: subl %eax, %edi
; X86-NEXT: movzwl %di, %eax
; X86-NEXT: shrl %eax
; X86-NEXT: imull %edx, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: popl %ebx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
%t3 = icmp ugt i16 %a1, %a2
%t4 = select i1 %t3, i16 -1, i16 1
@ -960,36 +957,33 @@ define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind {
; X64-LABEL: scalar_i8_unsigned_reg_reg:
; X64: # %bb.0:
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpb %dil, %sil
; X64-NEXT: movl %edi, %eax
; X64-NEXT: subb %sil, %al
; X64-NEXT: seta %dl
; X64-NEXT: sbbl %ecx, %ecx
; X64-NEXT: orb $1, %cl
; X64-NEXT: movzbl %dil, %edx
; X64-NEXT: subl %esi, %edi
; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: subl %edx, %eax
; X64-NEXT: cmovbl %edi, %eax
; X64-NEXT: negb %dl
; X64-NEXT: orb $1, %dl
; X64-NEXT: xorb %cl, %al
; X64-NEXT: subb %cl, %al
; X64-NEXT: shrb %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: mulb %cl
; X64-NEXT: addb %dl, %al
; X64-NEXT: mulb %dl
; X64-NEXT: addb %dil, %al
; X64-NEXT: retq
;
; X86-LABEL: scalar_i8_unsigned_reg_reg:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
; X86-NEXT: movb %cl, %al
; X86-NEXT: subb %ah, %al
; X86-NEXT: seta %dl
; X86-NEXT: ja .LBB16_2
; X86-NEXT: # %bb.1:
; X86-NEXT: subb %cl, %ah
; X86-NEXT: movb %ah, %al
; X86-NEXT: .LBB16_2:
; X86-NEXT: negb %dl
; X86-NEXT: orb $1, %dl
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: subb {{[0-9]+}}(%esp), %al
; X86-NEXT: seta %ah
; X86-NEXT: sbbl %edx, %edx
; X86-NEXT: negb %ah
; X86-NEXT: orb $1, %ah
; X86-NEXT: xorb %dl, %al
; X86-NEXT: subb %dl, %al
; X86-NEXT: shrb %al
; X86-NEXT: mulb %dl
; X86-NEXT: mulb %ah
; X86-NEXT: addb %cl, %al
; X86-NEXT: retl
%t3 = icmp ugt i8 %a1, %a2