From d6957d2140c96dd5d821c3dd69712be0edd38cee Mon Sep 17 00:00:00 2001 From: Takashi Idobe Date: Wed, 25 Mar 2026 18:48:38 -0400 Subject: [PATCH] [X86] fuse constant addition after sbb (#184541) Resolves: https://github.com/llvm/llvm-project/issues/171676 Related: https://github.com/llvm/llvm-project/pull/185117 (AArch64 side) The issue points out that `Fold ADD(ADC(Y,0,W),X) -> ADC(X,Y,W)` is optimized and that SBB can be optimized similarly: `Fold ADD(SBB(Y,0,W),C) -> SBB(Y,-C,W)`. With the changes from this branch, a new clang will compile the example code: ```c #include uint64_t f(uint64_t a, uint64_t b) { uint64_t x; x += __builtin_add_overflow(a, b, &x); return x + 10; } uint64_t g(uint64_t a, uint64_t b) { uint64_t x; x -= __builtin_sub_overflow(a, b, &x); return x + 10; } ``` And it's optimized for the sub case as well, instead of emitting a leaq on x86, it folds it in: ```asm f: movq %rdi, %rax addq %rsi, %rax adcq $10, %rax retq g: movq %rdi, %rax subq %rsi, %rax sbbq $-10, %rax retq ``` --- llvm/lib/Target/X86/X86ISelLowering.cpp | 17 ++ .../CodeGen/X86/apx/long-instruction-fixup.ll | 8 +- llvm/test/CodeGen/X86/apx/sbb.ll | 189 ++++++----------- llvm/test/CodeGen/X86/sbb-add-constant.ll | 191 ++++++++++++++++++ llvm/test/CodeGen/X86/select_const.ll | 4 +- 5 files changed, 274 insertions(+), 135 deletions(-) create mode 100644 llvm/test/CodeGen/X86/sbb-add-constant.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index fc6c8954691c..25fe4bcac8c6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -59430,6 +59430,23 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, Op0.getOperand(0), Op0.getOperand(2)); } + // Fold ADD(SBB(Y,0,W),C) -> SBB(Y,-C,W) + // SBB(Y,0,W) = Y - 0 - CF = Y - CF; adding C gives Y - CF + C = Y - (-C) - + // CF. The SBB flags output must be dead: changing the subtrahend from 0 to -C + // produces different EFLAGS bits. + SDValue SBB = Op0; + SDValue C = Op1; + if (SBB.getOpcode() != X86ISD::SBB) + std::swap(SBB, C); + if (SBB.getOpcode() == X86ISD::SBB && SBB->hasOneUse() && + X86::isZeroNode(SBB.getOperand(1)) && !SBB->hasAnyUseOfValue(1)) { + SDLoc SBBLoc(SBB); + return DAG + .getNode(X86ISD::SBB, SBBLoc, SBB->getVTList(), SBB.getOperand(0), + DAG.getNegative(C, SBBLoc, VT), SBB.getOperand(2)) + .getValue(0); + } + if (SDValue IFMA52 = matchVPMADD52(N, DAG, DL, VT, Subtarget)) return IFMA52; diff --git a/llvm/test/CodeGen/X86/apx/long-instruction-fixup.ll b/llvm/test/CodeGen/X86/apx/long-instruction-fixup.ll index 8d8b306f6893..d9d47674d7d1 100644 --- a/llvm/test/CodeGen/X86/apx/long-instruction-fixup.ll +++ b/llvm/test/CodeGen/X86/apx/long-instruction-fixup.ll @@ -169,8 +169,8 @@ define i32 @sbb32mi_GS(i32 %x, i32 %y) { ; CHECK-LABEL: sbb32mi_GS: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmpl %edi, %esi -; CHECK-NEXT: sbbl $0, %gs:255, %eax -; CHECK-NEXT: addl $-123456, %eax # imm = 0xFFFE1DC0 +; CHECK-NEXT: movl %gs:255, %eax +; CHECK-NEXT: sbbl $123456, %eax # imm = 0x1E240 ; CHECK-NEXT: retq entry: %a= inttoptr i32 255 to ptr addrspace(256) @@ -186,8 +186,8 @@ define i64 @sbb64mi_FS(i64 %x, i64 %y) { ; CHECK-LABEL: sbb64mi_FS: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmpq %rdi, %rsi -; CHECK-NEXT: sbbq $0, %fs:255, %rax -; CHECK-NEXT: addq $-123456, %rax # imm = 0xFFFE1DC0 +; CHECK-NEXT: movq %fs:255, %rax +; CHECK-NEXT: sbbq $123456, %rax # imm = 0x1E240 ; CHECK-NEXT: retq entry: %a= inttoptr i64 255 to ptr addrspace(257) diff --git a/llvm/test/CodeGen/X86/apx/sbb.ll b/llvm/test/CodeGen/X86/apx/sbb.ll index 5bf4fc009323..d38ac8916895 100644 --- a/llvm/test/CodeGen/X86/apx/sbb.ll +++ b/llvm/test/CodeGen/X86/apx/sbb.ll @@ -172,8 +172,7 @@ define i16 @sbb16ri8(i16 %a, i16 %x, i16 %y) nounwind { ; CHECK-LABEL: sbb16ri8: ; CHECK: # %bb.0: ; CHECK-NEXT: cmpw %si, %dx # encoding: [0x66,0x39,0xf2] -; CHECK-NEXT: sbbw $0, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0xdf,0x00] -; CHECK-NEXT: addw $-123, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x83,0xc0,0x85] +; CHECK-NEXT: sbbw $123, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0xdf,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] %s = sub i16 %a, 123 %k = icmp ugt i16 %x, %y @@ -186,8 +185,7 @@ define i32 @sbb32ri8(i32 %a, i32 %x, i32 %y) nounwind { ; CHECK-LABEL: sbb32ri8: ; CHECK: # %bb.0: ; CHECK-NEXT: cmpl %esi, %edx # encoding: [0x39,0xf2] -; CHECK-NEXT: sbbl $0, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xdf,0x00] -; CHECK-NEXT: addl $-123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xc0,0x85] +; CHECK-NEXT: sbbl $123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xdf,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] %s = sub i32 %a, 123 %k = icmp ugt i32 %x, %y @@ -197,26 +195,11 @@ define i32 @sbb32ri8(i32 %a, i32 %x, i32 %y) nounwind { } define i64 @sbb64ri8(i64 %a, i64 %x, i64 %y) nounwind { -; NDD-LABEL: sbb64ri8: -; NDD: # %bb.0: -; NDD-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2] -; NDD-NEXT: sbbq $0, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xdf,0x00] -; NDD-NEXT: addq $-123, %rax # encoding: [0x48,0x83,0xc0,0x85] -; NDD-NEXT: retq # encoding: [0xc3] -; -; IMM-LABEL: sbb64ri8: -; IMM: # %bb.0: -; IMM-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2] -; IMM-NEXT: sbbq $0, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xdf,0x00] -; IMM-NEXT: addq $-123, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xc0,0x85] -; IMM-NEXT: retq # encoding: [0xc3] -; -; MEMONLY-LABEL: sbb64ri8: -; MEMONLY: # %bb.0: -; MEMONLY-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2] -; MEMONLY-NEXT: sbbq $0, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xdf,0x00] -; MEMONLY-NEXT: addq $-123, %rax # encoding: [0x48,0x83,0xc0,0x85] -; MEMONLY-NEXT: retq # encoding: [0xc3] +; CHECK-LABEL: sbb64ri8: +; CHECK: # %bb.0: +; CHECK-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2] +; CHECK-NEXT: sbbq $123, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xdf,0x7b] +; CHECK-NEXT: retq # encoding: [0xc3] %s = sub i64 %a, 123 %k = icmp ugt i64 %x, %y %z = zext i1 %k to i64 @@ -228,8 +211,7 @@ define i8 @sbb8ri(i8 %a, i8 %x, i8 %y) nounwind { ; CHECK-LABEL: sbb8ri: ; CHECK: # %bb.0: ; CHECK-NEXT: cmpb %sil, %dl # encoding: [0x40,0x38,0xf2] -; CHECK-NEXT: sbbb $0, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0xdf,0x00] -; CHECK-NEXT: addb $-123, %al # EVEX TO LEGACY Compression encoding: [0x04,0x85] +; CHECK-NEXT: sbbb $123, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0xdf,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] %s = sub i8 %a, 123 %k = icmp ugt i8 %x, %y @@ -242,9 +224,8 @@ define i16 @sbb16ri(i16 %a, i16 %x, i16 %y) nounwind { ; CHECK-LABEL: sbb16ri: ; CHECK: # %bb.0: ; CHECK-NEXT: cmpw %si, %dx # encoding: [0x66,0x39,0xf2] -; CHECK-NEXT: sbbw $0, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0xdf,0x00] -; CHECK-NEXT: addw $-1234, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x05,0x2e,0xfb] -; CHECK-NEXT: # imm = 0xFB2E +; CHECK-NEXT: sbbw $1234, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x81,0xdf,0xd2,0x04] +; CHECK-NEXT: # imm = 0x4D2 ; CHECK-NEXT: retq # encoding: [0xc3] %s = sub i16 %a, 1234 %k = icmp ugt i16 %x, %y @@ -257,9 +238,8 @@ define i32 @sbb32ri(i32 %a, i32 %x, i32 %y) nounwind { ; CHECK-LABEL: sbb32ri: ; CHECK: # %bb.0: ; CHECK-NEXT: cmpl %esi, %edx # encoding: [0x39,0xf2] -; CHECK-NEXT: sbbl $0, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xdf,0x00] -; CHECK-NEXT: addl $-123456, %eax # EVEX TO LEGACY Compression encoding: [0x05,0xc0,0x1d,0xfe,0xff] -; CHECK-NEXT: # imm = 0xFFFE1DC0 +; CHECK-NEXT: sbbl $123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xdf,0x40,0xe2,0x01,0x00] +; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] %s = sub i32 %a, 123456 %k = icmp ugt i32 %x, %y @@ -269,29 +249,12 @@ define i32 @sbb32ri(i32 %a, i32 %x, i32 %y) nounwind { } define i64 @sbb64ri(i64 %a, i64 %x, i64 %y) nounwind { -; NDD-LABEL: sbb64ri: -; NDD: # %bb.0: -; NDD-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2] -; NDD-NEXT: sbbq $0, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xdf,0x00] -; NDD-NEXT: addq $-123456, %rax # encoding: [0x48,0x05,0xc0,0x1d,0xfe,0xff] -; NDD-NEXT: # imm = 0xFFFE1DC0 -; NDD-NEXT: retq # encoding: [0xc3] -; -; IMM-LABEL: sbb64ri: -; IMM: # %bb.0: -; IMM-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2] -; IMM-NEXT: sbbq $0, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xdf,0x00] -; IMM-NEXT: addq $-123456, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x05,0xc0,0x1d,0xfe,0xff] -; IMM-NEXT: # imm = 0xFFFE1DC0 -; IMM-NEXT: retq # encoding: [0xc3] -; -; MEMONLY-LABEL: sbb64ri: -; MEMONLY: # %bb.0: -; MEMONLY-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2] -; MEMONLY-NEXT: sbbq $0, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xdf,0x00] -; MEMONLY-NEXT: addq $-123456, %rax # encoding: [0x48,0x05,0xc0,0x1d,0xfe,0xff] -; MEMONLY-NEXT: # imm = 0xFFFE1DC0 -; MEMONLY-NEXT: retq # encoding: [0xc3] +; CHECK-LABEL: sbb64ri: +; CHECK: # %bb.0: +; CHECK-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2] +; CHECK-NEXT: sbbq $123456, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0xdf,0x40,0xe2,0x01,0x00] +; CHECK-NEXT: # imm = 0x1E240 +; CHECK-NEXT: retq # encoding: [0xc3] %s = sub i64 %a, 123456 %k = icmp ugt i64 %x, %y %z = zext i1 %k to i64 @@ -416,23 +379,20 @@ define i16 @sbb16mi8(ptr %ptr, i16 %x, i16 %y) nounwind { ; NDD: # %bb.0: ; NDD-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] ; NDD-NEXT: cmpw %si, %dx # encoding: [0x66,0x39,0xf2] -; NDD-NEXT: sbbw $0, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x83,0xd8,0x00] -; NDD-NEXT: addw $-123, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x83,0xc0,0x85] +; NDD-NEXT: sbbw $123, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x83,0xd8,0x7b] ; NDD-NEXT: retq # encoding: [0xc3] ; ; IMMONLY-LABEL: sbb16mi8: ; IMMONLY: # %bb.0: ; IMMONLY-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] ; IMMONLY-NEXT: cmpw %si, %dx # encoding: [0x66,0x39,0xf2] -; IMMONLY-NEXT: sbbw $0, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x83,0xd8,0x00] -; IMMONLY-NEXT: addw $-123, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x83,0xc0,0x85] +; IMMONLY-NEXT: sbbw $123, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x83,0xd8,0x7b] ; IMMONLY-NEXT: retq # encoding: [0xc3] ; ; MEM-LABEL: sbb16mi8: ; MEM: # %bb.0: ; MEM-NEXT: cmpw %si, %dx # encoding: [0x66,0x39,0xf2] -; MEM-NEXT: sbbw $0, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0x1f,0x00] -; MEM-NEXT: addw $-123, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x83,0xc0,0x85] +; MEM-NEXT: sbbw $123, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0x1f,0x7b] ; MEM-NEXT: retq # encoding: [0xc3] %a = load i16, ptr %ptr %s = sub i16 %a, 123 @@ -447,23 +407,20 @@ define i32 @sbb32mi8(ptr %ptr, i32 %x, i32 %y) nounwind { ; NDD: # %bb.0: ; NDD-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07] ; NDD-NEXT: cmpl %esi, %edx # encoding: [0x39,0xf2] -; NDD-NEXT: sbbl $0, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xd8,0x00] -; NDD-NEXT: addl $-123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xc0,0x85] +; NDD-NEXT: sbbl $123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xd8,0x7b] ; NDD-NEXT: retq # encoding: [0xc3] ; ; IMMONLY-LABEL: sbb32mi8: ; IMMONLY: # %bb.0: ; IMMONLY-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07] ; IMMONLY-NEXT: cmpl %esi, %edx # encoding: [0x39,0xf2] -; IMMONLY-NEXT: sbbl $0, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xd8,0x00] -; IMMONLY-NEXT: addl $-123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xc0,0x85] +; IMMONLY-NEXT: sbbl $123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xd8,0x7b] ; IMMONLY-NEXT: retq # encoding: [0xc3] ; ; MEM-LABEL: sbb32mi8: ; MEM: # %bb.0: ; MEM-NEXT: cmpl %esi, %edx # encoding: [0x39,0xf2] -; MEM-NEXT: sbbl $0, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0x1f,0x00] -; MEM-NEXT: addl $-123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xc0,0x85] +; MEM-NEXT: sbbl $123, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0x1f,0x7b] ; MEM-NEXT: retq # encoding: [0xc3] %a = load i32, ptr %ptr %s = sub i32 %a, 123 @@ -478,31 +435,21 @@ define i64 @sbb64mi8(ptr %ptr, i64 %x, i64 %y) nounwind { ; NDD: # %bb.0: ; NDD-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] ; NDD-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2] -; NDD-NEXT: sbbq $0, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xd8,0x00] -; NDD-NEXT: addq $-123, %rax # encoding: [0x48,0x83,0xc0,0x85] +; NDD-NEXT: sbbq $123, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xd8,0x7b] ; NDD-NEXT: retq # encoding: [0xc3] ; ; IMMONLY-LABEL: sbb64mi8: ; IMMONLY: # %bb.0: ; IMMONLY-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] ; IMMONLY-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2] -; IMMONLY-NEXT: sbbq $0, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xd8,0x00] -; IMMONLY-NEXT: addq $-123, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xc0,0x85] +; IMMONLY-NEXT: sbbq $123, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xd8,0x7b] ; IMMONLY-NEXT: retq # encoding: [0xc3] ; -; MEMONLY-LABEL: sbb64mi8: -; MEMONLY: # %bb.0: -; MEMONLY-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2] -; MEMONLY-NEXT: sbbq $0, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0x1f,0x00] -; MEMONLY-NEXT: addq $-123, %rax # encoding: [0x48,0x83,0xc0,0x85] -; MEMONLY-NEXT: retq # encoding: [0xc3] -; -; BOTH-LABEL: sbb64mi8: -; BOTH: # %bb.0: -; BOTH-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2] -; BOTH-NEXT: sbbq $0, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0x1f,0x00] -; BOTH-NEXT: addq $-123, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xc0,0x85] -; BOTH-NEXT: retq # encoding: [0xc3] +; MEM-LABEL: sbb64mi8: +; MEM: # %bb.0: +; MEM-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2] +; MEM-NEXT: sbbq $123, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0x1f,0x7b] +; MEM-NEXT: retq # encoding: [0xc3] %a = load i64, ptr %ptr %s = sub i64 %a, 123 %k = icmp ugt i64 %x, %y @@ -516,23 +463,20 @@ define i8 @sbb8mi(ptr %ptr, i8 %x, i8 %y) nounwind { ; NDD: # %bb.0: ; NDD-NEXT: movzbl (%rdi), %eax # encoding: [0x0f,0xb6,0x07] ; NDD-NEXT: cmpb %sil, %dl # encoding: [0x40,0x38,0xf2] -; NDD-NEXT: sbbb $0, %al # EVEX TO LEGACY Compression encoding: [0x1c,0x00] -; NDD-NEXT: addb $-123, %al # EVEX TO LEGACY Compression encoding: [0x04,0x85] +; NDD-NEXT: sbbb $123, %al # EVEX TO LEGACY Compression encoding: [0x1c,0x7b] ; NDD-NEXT: retq # encoding: [0xc3] ; ; IMMONLY-LABEL: sbb8mi: ; IMMONLY: # %bb.0: ; IMMONLY-NEXT: movzbl (%rdi), %eax # encoding: [0x0f,0xb6,0x07] ; IMMONLY-NEXT: cmpb %sil, %dl # encoding: [0x40,0x38,0xf2] -; IMMONLY-NEXT: sbbb $0, %al # EVEX TO LEGACY Compression encoding: [0x1c,0x00] -; IMMONLY-NEXT: addb $-123, %al # EVEX TO LEGACY Compression encoding: [0x04,0x85] +; IMMONLY-NEXT: sbbb $123, %al # EVEX TO LEGACY Compression encoding: [0x1c,0x7b] ; IMMONLY-NEXT: retq # encoding: [0xc3] ; ; MEM-LABEL: sbb8mi: ; MEM: # %bb.0: ; MEM-NEXT: cmpb %sil, %dl # encoding: [0x40,0x38,0xf2] -; MEM-NEXT: sbbb $0, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0x1f,0x00] -; MEM-NEXT: addb $-123, %al # EVEX TO LEGACY Compression encoding: [0x04,0x85] +; MEM-NEXT: sbbb $123, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0x1f,0x7b] ; MEM-NEXT: retq # encoding: [0xc3] %a = load i8, ptr %ptr %s = sub i8 %a, 123 @@ -547,26 +491,23 @@ define i16 @sbb16mi(ptr %ptr, i16 %x, i16 %y) nounwind { ; NDD: # %bb.0: ; NDD-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] ; NDD-NEXT: cmpw %si, %dx # encoding: [0x66,0x39,0xf2] -; NDD-NEXT: sbbw $0, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x83,0xd8,0x00] -; NDD-NEXT: addw $-1234, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x05,0x2e,0xfb] -; NDD-NEXT: # imm = 0xFB2E +; NDD-NEXT: sbbw $1234, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x1d,0xd2,0x04] +; NDD-NEXT: # imm = 0x4D2 ; NDD-NEXT: retq # encoding: [0xc3] ; ; IMMONLY-LABEL: sbb16mi: ; IMMONLY: # %bb.0: ; IMMONLY-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] ; IMMONLY-NEXT: cmpw %si, %dx # encoding: [0x66,0x39,0xf2] -; IMMONLY-NEXT: sbbw $0, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x83,0xd8,0x00] -; IMMONLY-NEXT: addw $-1234, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x05,0x2e,0xfb] -; IMMONLY-NEXT: # imm = 0xFB2E +; IMMONLY-NEXT: sbbw $1234, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x1d,0xd2,0x04] +; IMMONLY-NEXT: # imm = 0x4D2 ; IMMONLY-NEXT: retq # encoding: [0xc3] ; ; MEM-LABEL: sbb16mi: ; MEM: # %bb.0: ; MEM-NEXT: cmpw %si, %dx # encoding: [0x66,0x39,0xf2] -; MEM-NEXT: sbbw $0, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0x1f,0x00] -; MEM-NEXT: addw $-1234, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x05,0x2e,0xfb] -; MEM-NEXT: # imm = 0xFB2E +; MEM-NEXT: sbbw $1234, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0x81,0x1f,0xd2,0x04] +; MEM-NEXT: # imm = 0x4D2 ; MEM-NEXT: retq # encoding: [0xc3] %a = load i16, ptr %ptr %s = sub i16 %a, 1234 @@ -581,26 +522,23 @@ define i32 @sbb32mi(ptr %ptr, i32 %x, i32 %y) nounwind { ; NDD: # %bb.0: ; NDD-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07] ; NDD-NEXT: cmpl %esi, %edx # encoding: [0x39,0xf2] -; NDD-NEXT: sbbl $0, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xd8,0x00] -; NDD-NEXT: addl $-123456, %eax # EVEX TO LEGACY Compression encoding: [0x05,0xc0,0x1d,0xfe,0xff] -; NDD-NEXT: # imm = 0xFFFE1DC0 +; NDD-NEXT: sbbl $123456, %eax # EVEX TO LEGACY Compression encoding: [0x1d,0x40,0xe2,0x01,0x00] +; NDD-NEXT: # imm = 0x1E240 ; NDD-NEXT: retq # encoding: [0xc3] ; ; IMMONLY-LABEL: sbb32mi: ; IMMONLY: # %bb.0: ; IMMONLY-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07] ; IMMONLY-NEXT: cmpl %esi, %edx # encoding: [0x39,0xf2] -; IMMONLY-NEXT: sbbl $0, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xd8,0x00] -; IMMONLY-NEXT: addl $-123456, %eax # EVEX TO LEGACY Compression encoding: [0x05,0xc0,0x1d,0xfe,0xff] -; IMMONLY-NEXT: # imm = 0xFFFE1DC0 +; IMMONLY-NEXT: sbbl $123456, %eax # EVEX TO LEGACY Compression encoding: [0x1d,0x40,0xe2,0x01,0x00] +; IMMONLY-NEXT: # imm = 0x1E240 ; IMMONLY-NEXT: retq # encoding: [0xc3] ; ; MEM-LABEL: sbb32mi: ; MEM: # %bb.0: ; MEM-NEXT: cmpl %esi, %edx # encoding: [0x39,0xf2] -; MEM-NEXT: sbbl $0, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0x1f,0x00] -; MEM-NEXT: addl $-123456, %eax # EVEX TO LEGACY Compression encoding: [0x05,0xc0,0x1d,0xfe,0xff] -; MEM-NEXT: # imm = 0xFFFE1DC0 +; MEM-NEXT: sbbl $123456, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0x1f,0x40,0xe2,0x01,0x00] +; MEM-NEXT: # imm = 0x1E240 ; MEM-NEXT: retq # encoding: [0xc3] %a = load i32, ptr %ptr %s = sub i32 %a, 123456 @@ -615,35 +553,24 @@ define i64 @sbb64mi(ptr %ptr, i64 %x, i64 %y) nounwind { ; NDD: # %bb.0: ; NDD-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] ; NDD-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2] -; NDD-NEXT: sbbq $0, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xd8,0x00] -; NDD-NEXT: addq $-123456, %rax # encoding: [0x48,0x05,0xc0,0x1d,0xfe,0xff] -; NDD-NEXT: # imm = 0xFFFE1DC0 +; NDD-NEXT: sbbq $123456, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x1d,0x40,0xe2,0x01,0x00] +; NDD-NEXT: # imm = 0x1E240 ; NDD-NEXT: retq # encoding: [0xc3] ; ; IMMONLY-LABEL: sbb64mi: ; IMMONLY: # %bb.0: ; IMMONLY-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] ; IMMONLY-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2] -; IMMONLY-NEXT: sbbq $0, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xd8,0x00] -; IMMONLY-NEXT: addq $-123456, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x05,0xc0,0x1d,0xfe,0xff] -; IMMONLY-NEXT: # imm = 0xFFFE1DC0 +; IMMONLY-NEXT: sbbq $123456, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x1d,0x40,0xe2,0x01,0x00] +; IMMONLY-NEXT: # imm = 0x1E240 ; IMMONLY-NEXT: retq # encoding: [0xc3] ; -; MEMONLY-LABEL: sbb64mi: -; MEMONLY: # %bb.0: -; MEMONLY-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2] -; MEMONLY-NEXT: sbbq $0, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0x1f,0x00] -; MEMONLY-NEXT: addq $-123456, %rax # encoding: [0x48,0x05,0xc0,0x1d,0xfe,0xff] -; MEMONLY-NEXT: # imm = 0xFFFE1DC0 -; MEMONLY-NEXT: retq # encoding: [0xc3] -; -; BOTH-LABEL: sbb64mi: -; BOTH: # %bb.0: -; BOTH-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2] -; BOTH-NEXT: sbbq $0, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0x1f,0x00] -; BOTH-NEXT: addq $-123456, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x05,0xc0,0x1d,0xfe,0xff] -; BOTH-NEXT: # imm = 0xFFFE1DC0 -; BOTH-NEXT: retq # encoding: [0xc3] +; MEM-LABEL: sbb64mi: +; MEM: # %bb.0: +; MEM-NEXT: cmpq %rsi, %rdx # encoding: [0x48,0x39,0xf2] +; MEM-NEXT: sbbq $123456, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0x1f,0x40,0xe2,0x01,0x00] +; MEM-NEXT: # imm = 0x1E240 +; MEM-NEXT: retq # encoding: [0xc3] %a = load i64, ptr %ptr %s = sub i64 %a, 123456 %k = icmp ugt i64 %x, %y @@ -711,3 +638,7 @@ define void @sbb64mr_legacy(i64 %a, ptr %ptr, i64 %x, i64 %y) nounwind { store i64 %r, ptr %ptr ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; BOTH: {{.*}} +; IMM: {{.*}} +; MEMONLY: {{.*}} diff --git a/llvm/test/CodeGen/X86/sbb-add-constant.ll b/llvm/test/CodeGen/X86/sbb-add-constant.ll new file mode 100644 index 000000000000..5a4f547ca589 --- /dev/null +++ b/llvm/test/CodeGen/X86/sbb-add-constant.ll @@ -0,0 +1,191 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s +; +; Verify that ADD(SBB(Y,0,flags),C) folds to SBB(Y,-C,flags). +; SBB(Y,0) = Y - CF; adding C gives Y - CF + C = Y - (-C) - CF = SBB(Y,-C). +; + +; Fold should fire because all conditions are met +define i64 @g_i64(i64 %a, i64 %b) { +; CHECK-LABEL: g_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: subq %rsi, %rax +; CHECK-NEXT: sbbq $-10, %rax +; CHECK-NEXT: retq + %ov = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue { i64, i1 } %ov, 0 + %bit = extractvalue { i64, i1 } %ov, 1 + %ext = sext i1 %bit to i64 + %r = add i64 %val, %ext + %r2 = add i64 %r, 10 + ret i64 %r2 +} + +; Fold should fire because all conditions are met +define i32 @g_i32(i32 %a, i32 %b) { +; CHECK-LABEL: g_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: subl %esi, %eax +; CHECK-NEXT: sbbl $-10, %eax +; CHECK-NEXT: retq + %ov = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue { i32, i1 } %ov, 0 + %bit = extractvalue { i32, i1 } %ov, 1 + %ext = sext i1 %bit to i32 + %r = add i32 %val, %ext + %r2 = add i32 %r, 10 + ret i32 %r2 +} + +; Non-constant addend, fold should still fire. +define i64 @g_nonconstant(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: g_nonconstant: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: negq %rdx +; CHECK-NEXT: subq %rsi, %rax +; CHECK-NEXT: sbbq %rdx, %rax +; CHECK-NEXT: retq + %ov = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue { i64, i1 } %ov, 0 + %bit = extractvalue { i64, i1 } %ov, 1 + %ext = sext i1 %bit to i64 + %r = add i64 %val, %ext + %r2 = add i64 %r, %c + ret i64 %r2 +} + +; Non-constant addend, fold should still fire. +define i32 @g_nonconstant_i32(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: g_nonconstant_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: negl %edx +; CHECK-NEXT: subl %esi, %eax +; CHECK-NEXT: sbbl %edx, %eax +; CHECK-NEXT: retq + %ov = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue { i32, i1 } %ov, 0 + %bit = extractvalue { i32, i1 } %ov, 1 + %ext = sext i1 %bit to i32 + %r = add i32 %val, %ext + %r2 = add i32 %r, %c + ret i32 %r2 +} + +; Non-constant addend in commuted form, fold should still fire. +define i64 @g_nonconstant_commuted(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: g_nonconstant_commuted: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: negq %rdx +; CHECK-NEXT: subq %rsi, %rax +; CHECK-NEXT: sbbq %rdx, %rax +; CHECK-NEXT: retq + %ov = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue { i64, i1 } %ov, 0 + %bit = extractvalue { i64, i1 } %ov, 1 + %ext = sext i1 %bit to i64 + %r = add i64 %val, %ext + %r2 = add i64 %c, %r + ret i64 %r2 +} + +; Non-constant addend in commuted form, fold should still fire. +define i32 @g_nonconstant_commuted_i32(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: g_nonconstant_commuted_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: negl %edx +; CHECK-NEXT: subl %esi, %eax +; CHECK-NEXT: sbbl %edx, %eax +; CHECK-NEXT: retq + %ov = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue { i32, i1 } %ov, 0 + %bit = extractvalue { i32, i1 } %ov, 1 + %ext = sext i1 %bit to i32 + %r = add i32 %val, %ext + %r2 = add i32 %c, %r + ret i32 %r2 +} + +; INT_MIN should fold correctly too. +define i32 @g_i32_int_min(i32 %a, i32 %b) { +; CHECK-LABEL: g_i32_int_min: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: subl %esi, %eax +; CHECK-NEXT: sbbl $-2147483648, %eax # imm = 0x80000000 +; CHECK-NEXT: retq + %ov = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue { i32, i1 } %ov, 0 + %bit = extractvalue { i32, i1 } %ov, 1 + %ext = sext i1 %bit to i32 + %r = add i32 %val, %ext + %r2 = add i32 %r, -2147483648 + ret i32 %r2 +} + +; Multiple uses of SBB result, fold should not fire +define i64 @g_multi_use(i64 %a, i64 %b, ptr %out) { +; CHECK-LABEL: g_multi_use: +; CHECK: # %bb.0: +; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: sbbq $0, %rdi +; CHECK-NEXT: movq %rdi, (%rdx) +; CHECK-NEXT: leaq 10(%rdi), %rax +; CHECK-NEXT: retq + %ov = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue { i64, i1 } %ov, 0 + %bit = extractvalue { i64, i1 } %ov, 1 + %ext = sext i1 %bit to i64 + %sbb = add i64 %val, %ext + store i64 %sbb, ptr %out + %r = add i64 %sbb, 10 + ret i64 %r +} + +; Multiple uses of SBB result, fold should not fire +define i32 @g_multi_use_i32(i32 %a, i32 %b, ptr %out) { +; CHECK-LABEL: g_multi_use_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: sbbl $0, %edi +; CHECK-NEXT: movl %edi, (%rdx) +; CHECK-NEXT: leal 10(%rdi), %eax +; CHECK-NEXT: retq + %ov = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue { i32, i1 } %ov, 0 + %bit = extractvalue { i32, i1 } %ov, 1 + %ext = sext i1 %bit to i32 + %sbb = add i32 %val, %ext + store i32 %sbb, ptr %out + %r = add i32 %sbb, 10 + ret i32 %r +} + +; Flags live across the low-limb constant add into the next SBB in the chain. +; Fold should not fire. +define {i64, i64} @g_flags_live(i64 %a_lo, i64 %a_hi, i64 %b_lo, i64 %b_hi) { +; CHECK-LABEL: g_flags_live: +; CHECK: # %bb.0: +; CHECK-NEXT: subq %rdx, %rdi +; CHECK-NEXT: leaq 10(%rdi), %rax +; CHECK-NEXT: sbbq %rcx, %rsi +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: retq + %lo = call { i8, i64 } @llvm.x86.subborrow.64(i8 0, i64 %a_lo, i64 %b_lo) + %lo_b = extractvalue { i8, i64 } %lo, 0 + %lo_val = extractvalue { i8, i64 } %lo, 1 + %lo_plus = add i64 %lo_val, 10 + + %hi = call { i8, i64 } @llvm.x86.subborrow.64(i8 %lo_b, i64 %a_hi, i64 %b_hi) + %hi_val = extractvalue { i8, i64 } %hi, 1 + + %ret = insertvalue {i64, i64} poison, i64 %lo_plus, 0 + %ret2 = insertvalue {i64, i64} %ret, i64 %hi_val, 1 + ret {i64, i64} %ret2 +} diff --git a/llvm/test/CodeGen/X86/select_const.ll b/llvm/test/CodeGen/X86/select_const.ll index 35f4655dd6d7..a7da07f1ae5d 100644 --- a/llvm/test/CodeGen/X86/select_const.ll +++ b/llvm/test/CodeGen/X86/select_const.ll @@ -439,9 +439,9 @@ define i64 @sel_1_2(i64 %x, i64 %y) { ; ; X64-LABEL: sel_1_2: ; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: cmpq $42, %rdi -; X64-NEXT: sbbq $0, %rsi -; X64-NEXT: leaq 2(%rsi), %rax +; X64-NEXT: sbbq $-2, %rax ; X64-NEXT: retq %cmp = icmp ult i64 %x, 42 %sel = select i1 %cmp, i64 1, i64 2