[ARM] Try to lower sign bit SELECT_CC to shift (#186349)

Lower a `x < 0 ? 1 : 0` style SELECT_CC to `x>>(bw-1)`. This will become
more important with an upcoming change, but also appears to be somewhat
useful by itself.
This commit is contained in:
Nikita Popov 2026-03-16 09:32:43 +01:00 committed by GitHub
parent d8386dbe7d
commit f894e8e92d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 95 additions and 138 deletions

View File

@ -5199,6 +5199,12 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::AND, dl, VT, LHS, Shift);
}
// (SELECT_CC setlt, x, 0, 1, 0) -> SRL(x, bw-1)
if (CC == ISD::SETLT && isNullConstant(RHS) && isOneConstant(TrueVal) &&
isNullConstant(FalseVal) && LHS.getValueType() == VT)
return DAG.getNode(ISD::SRL, dl, VT, LHS,
DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
}
if (LHS.getValueType() == MVT::i32) {

View File

@ -966,48 +966,15 @@ define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
;------------------------------------------------------------------------------;
define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
; ARM6-LABEL: negative_scalar_i8_bitsinmiddle_slt:
; ARM6: @ %bb.0:
; ARM6-NEXT: uxtb r1, r1
; ARM6-NEXT: mov r2, #24
; ARM6-NEXT: ands r0, r0, r2, lsr r1
; ARM6-NEXT: mov r0, #0
; ARM6-NEXT: movmi r0, #1
; ARM6-NEXT: bx lr
; ARM-LABEL: negative_scalar_i8_bitsinmiddle_slt:
; ARM: @ %bb.0:
; ARM-NEXT: mov r0, #0
; ARM-NEXT: bx lr
;
; ARM78-LABEL: negative_scalar_i8_bitsinmiddle_slt:
; ARM78: @ %bb.0:
; ARM78-NEXT: uxtb r1, r1
; ARM78-NEXT: mov r2, #24
; ARM78-NEXT: ands r0, r0, r2, lsr r1
; ARM78-NEXT: mov r0, #0
; ARM78-NEXT: movwmi r0, #1
; ARM78-NEXT: bx lr
;
; THUMB6-LABEL: negative_scalar_i8_bitsinmiddle_slt:
; THUMB6: @ %bb.0:
; THUMB6-NEXT: uxtb r1, r1
; THUMB6-NEXT: movs r2, #24
; THUMB6-NEXT: lsrs r2, r1
; THUMB6-NEXT: ands r2, r0
; THUMB6-NEXT: bmi .LBB20_2
; THUMB6-NEXT: @ %bb.1:
; THUMB6-NEXT: movs r0, #0
; THUMB6-NEXT: bx lr
; THUMB6-NEXT: .LBB20_2:
; THUMB6-NEXT: movs r0, #1
; THUMB6-NEXT: bx lr
;
; THUMB78-LABEL: negative_scalar_i8_bitsinmiddle_slt:
; THUMB78: @ %bb.0:
; THUMB78-NEXT: uxtb r1, r1
; THUMB78-NEXT: movs r2, #24
; THUMB78-NEXT: lsr.w r1, r2, r1
; THUMB78-NEXT: ands r0, r1
; THUMB78-NEXT: mov.w r0, #0
; THUMB78-NEXT: it mi
; THUMB78-NEXT: movmi r0, #1
; THUMB78-NEXT: bx lr
; THUMB-LABEL: negative_scalar_i8_bitsinmiddle_slt:
; THUMB: @ %bb.0:
; THUMB-NEXT: movs r0, #0
; THUMB-NEXT: bx lr
%t0 = lshr i8 24, %y
%t1 = and i8 %t0, %x
%res = icmp slt i8 %t1, 0

View File

@ -1010,10 +1010,8 @@ define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
; ARM6-NEXT: uxtb r1, r1
; ARM6-NEXT: mov r2, #24
; ARM6-NEXT: and r0, r0, r2, lsl r1
; ARM6-NEXT: sxtb r1, r0
; ARM6-NEXT: mov r0, #0
; ARM6-NEXT: cmp r1, #0
; ARM6-NEXT: movmi r0, #1
; ARM6-NEXT: mov r1, #1
; ARM6-NEXT: and r0, r1, r0, lsr #7
; ARM6-NEXT: bx lr
;
; ARM78-LABEL: negative_scalar_i8_bitsinmiddle_slt:
@ -1021,10 +1019,7 @@ define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
; ARM78-NEXT: uxtb r1, r1
; ARM78-NEXT: mov r2, #24
; ARM78-NEXT: and r0, r0, r2, lsl r1
; ARM78-NEXT: sxtb r1, r0
; ARM78-NEXT: mov r0, #0
; ARM78-NEXT: cmp r1, #0
; ARM78-NEXT: movwmi r0, #1
; ARM78-NEXT: ubfx r0, r0, #7, #1
; ARM78-NEXT: bx lr
;
; THUMB6-LABEL: negative_scalar_i8_bitsinmiddle_slt:
@ -1033,14 +1028,8 @@ define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
; THUMB6-NEXT: movs r2, #24
; THUMB6-NEXT: lsls r2, r1
; THUMB6-NEXT: ands r2, r0
; THUMB6-NEXT: sxtb r0, r2
; THUMB6-NEXT: cmp r0, #0
; THUMB6-NEXT: bmi .LBB20_2
; THUMB6-NEXT: @ %bb.1:
; THUMB6-NEXT: movs r0, #0
; THUMB6-NEXT: bx lr
; THUMB6-NEXT: .LBB20_2:
; THUMB6-NEXT: movs r0, #1
; THUMB6-NEXT: lsls r0, r2, #24
; THUMB6-NEXT: lsrs r0, r0, #31
; THUMB6-NEXT: bx lr
;
; THUMB78-LABEL: negative_scalar_i8_bitsinmiddle_slt:
@ -1049,11 +1038,7 @@ define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
; THUMB78-NEXT: movs r2, #24
; THUMB78-NEXT: lsl.w r1, r2, r1
; THUMB78-NEXT: ands r0, r1
; THUMB78-NEXT: sxtb r1, r0
; THUMB78-NEXT: movs r0, #0
; THUMB78-NEXT: cmp r1, #0
; THUMB78-NEXT: it mi
; THUMB78-NEXT: movmi r0, #1
; THUMB78-NEXT: ubfx r0, r0, #7, #1
; THUMB78-NEXT: bx lr
%t0 = shl i8 24, %y
%t1 = and i8 %t0, %x

View File

@ -421,69 +421,68 @@ define i64 @func7(i64 %x, i64 %y) nounwind {
; ARM-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: adds r0, r0, r7
; ARM-NEXT: adds r3, r0, r7
; ARM-NEXT: adcs r1, r6
; ARM-NEXT: rsbs r5, r1, #0
; ARM-NEXT: adcs r5, r1
; ARM-NEXT: movs r2, #1
; ARM-NEXT: str r0, [sp, #16] @ 4-byte Spill
; ARM-NEXT: cmp r0, #0
; ARM-NEXT: mov r3, r2
; ARM-NEXT: bge .LBB6_2
; ARM-NEXT: @ %bb.1:
; ARM-NEXT: mov r3, r4
; ARM-NEXT: .LBB6_2:
; ARM-NEXT: mov r6, r2
; ARM-NEXT: bmi .LBB6_4
; ARM-NEXT: @ %bb.3:
; ARM-NEXT: mov r6, r4
; ARM-NEXT: .LBB6_4:
; ARM-NEXT: ands r5, r6
; ARM-NEXT: rsbs r0, r1, #0
; ARM-NEXT: adcs r0, r1
; ARM-NEXT: lsrs r2, r3, #31
; ARM-NEXT: ands r2, r0
; ARM-NEXT: movs r0, #1
; ARM-NEXT: cmp r1, #0
; ARM-NEXT: mov r7, r2
; ARM-NEXT: bgt .LBB6_6
; ARM-NEXT: @ %bb.5:
; ARM-NEXT: mov r7, r4
; ARM-NEXT: .LBB6_6:
; ARM-NEXT: orrs r7, r5
; ARM-NEXT: mov r5, r0
; ARM-NEXT: bgt .LBB6_2
; ARM-NEXT: @ %bb.1:
; ARM-NEXT: mov r5, r4
; ARM-NEXT: .LBB6_2:
; ARM-NEXT: orrs r5, r2
; ARM-NEXT: mvns r6, r4
; ARM-NEXT: cmp r7, #0
; ARM-NEXT: beq .LBB6_8
; ARM-NEXT: @ %bb.7:
; ARM-NEXT: ldr r0, .LCPI6_0
; ARM-NEXT: str r0, [sp, #16] @ 4-byte Spill
; ARM-NEXT: .LBB6_8:
; ARM-NEXT: cmp r5, #0
; ARM-NEXT: bne .LBB6_4
; ARM-NEXT: @ %bb.3:
; ARM-NEXT: str r3, [sp, #16] @ 4-byte Spill
; ARM-NEXT: mov r5, r6
; ARM-NEXT: bne .LBB6_10
; ARM-NEXT: @ %bb.9:
; ARM-NEXT: beq .LBB6_5
; ARM-NEXT: b .LBB6_6
; ARM-NEXT: .LBB6_4:
; ARM-NEXT: ldr r2, .LCPI6_0
; ARM-NEXT: str r2, [sp, #16] @ 4-byte Spill
; ARM-NEXT: mov r5, r6
; ARM-NEXT: bne .LBB6_6
; ARM-NEXT: .LBB6_5:
; ARM-NEXT: ldr r5, [sp] @ 4-byte Reload
; ARM-NEXT: .LBB6_10:
; ARM-NEXT: adds r0, r1, #1
; ARM-NEXT: rsbs r7, r0, #0
; ARM-NEXT: adcs r7, r0
; ARM-NEXT: .LBB6_6:
; ARM-NEXT: adds r2, r1, #1
; ARM-NEXT: rsbs r7, r2, #0
; ARM-NEXT: adcs r7, r2
; ARM-NEXT: cmp r3, #0
; ARM-NEXT: mov r3, r0
; ARM-NEXT: bge .LBB6_8
; ARM-NEXT: @ %bb.7:
; ARM-NEXT: mov r3, r4
; ARM-NEXT: .LBB6_8:
; ARM-NEXT: ands r7, r3
; ARM-NEXT: cmp r1, r6
; ARM-NEXT: mov r3, r2
; ARM-NEXT: blt .LBB6_12
; ARM-NEXT: @ %bb.11:
; ARM-NEXT: mov r3, r0
; ARM-NEXT: blt .LBB6_10
; ARM-NEXT: @ %bb.9:
; ARM-NEXT: mov r3, r4
; ARM-NEXT: .LBB6_12:
; ARM-NEXT: .LBB6_10:
; ARM-NEXT: orrs r3, r7
; ARM-NEXT: lsls r1, r2, #31
; ARM-NEXT: lsls r1, r0, #31
; ARM-NEXT: cmp r3, #0
; ARM-NEXT: bne .LBB6_12
; ARM-NEXT: @ %bb.11:
; ARM-NEXT: mov r4, r5
; ARM-NEXT: .LBB6_12:
; ARM-NEXT: bne .LBB6_14
; ARM-NEXT: @ %bb.13:
; ARM-NEXT: mov r4, r5
; ARM-NEXT: .LBB6_14:
; ARM-NEXT: bne .LBB6_16
; ARM-NEXT: @ %bb.15:
; ARM-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
; ARM-NEXT: .LBB6_16:
; ARM-NEXT: .LBB6_14:
; ARM-NEXT: mov r0, r4
; ARM-NEXT: add sp, #20
; ARM-NEXT: pop {r4, r5, r6, r7, pc}
; ARM-NEXT: .p2align 2
; ARM-NEXT: @ %bb.17:
; ARM-NEXT: @ %bb.15:
; ARM-NEXT: .LCPI6_0:
; ARM-NEXT: .long 2147483647 @ 0x7fffffff
%tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 32)

View File

@ -34,45 +34,45 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: sadd_int64_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: vmov r0, r1, d2
; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: vmov r4, r5, d1
; CHECK-NEXT: adds.w r12, r2, r0
; CHECK-NEXT: vmov r0, r4, d1
; CHECK-NEXT: adc.w lr, r3, r1
; CHECK-NEXT: adc.w r0, r3, r1
; CHECK-NEXT: subs.w r2, r12, r2
; CHECK-NEXT: sbcs.w r2, lr, r3
; CHECK-NEXT: sbcs.w r2, r0, r3
; CHECK-NEXT: mov.w r3, #0
; CHECK-NEXT: cset r2, lt
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: it mi
; CHECK-NEXT: eormi r2, r2, #1
; CHECK-NEXT: rsbs r1, r2, #0
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: bfi r2, r1, #0, #8
; CHECK-NEXT: vmov r1, r3, d3
; CHECK-NEXT: adds r1, r1, r0
; CHECK-NEXT: adc.w r5, r4, r3
; CHECK-NEXT: subs r0, r1, r0
; CHECK-NEXT: sbcs.w r0, r5, r4
; CHECK-NEXT: vmov q0[2], q0[0], r12, r1
; CHECK-NEXT: cset r0, lt
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: it mi
; CHECK-NEXT: eormi r0, r0, #1
; CHECK-NEXT: asr.w r1, lr, #31
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: vmov q0[3], q0[1], lr, r5
; CHECK-NEXT: bfi r2, r0, #8, #8
; CHECK-NEXT: asrs r0, r5, #31
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmsr p0, r2
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: cmp.w r3, r1, lsr #31
; CHECK-NEXT: it ne
; CHECK-NEXT: eorne r2, r2, #1
; CHECK-NEXT: rsb.w lr, r2, #0
; CHECK-NEXT: vmov r2, r1, d3
; CHECK-NEXT: adds r2, r2, r4
; CHECK-NEXT: adc.w r6, r5, r1
; CHECK-NEXT: subs r4, r2, r4
; CHECK-NEXT: sbcs.w r4, r6, r5
; CHECK-NEXT: vmov q0[2], q0[0], r12, r2
; CHECK-NEXT: cset r4, lt
; CHECK-NEXT: cmp.w r3, r1, lsr #31
; CHECK-NEXT: it ne
; CHECK-NEXT: eorne r4, r4, #1
; CHECK-NEXT: bfi r3, lr, #0, #8
; CHECK-NEXT: rsbs r1, r4, #0
; CHECK-NEXT: vmov q0[3], q0[1], r0, r6
; CHECK-NEXT: bfi r3, r1, #8, #8
; CHECK-NEXT: asrs r1, r6, #31
; CHECK-NEXT: asrs r0, r0, #31
; CHECK-NEXT: vmsr p0, r3
; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
; CHECK-NEXT: vmov q1[3], q1[1], r0, r1
; CHECK-NEXT: adr r0, .LCPI3_0
; CHECK-NEXT: vldrw.u32 q2, [r0]
; CHECK-NEXT: veor q1, q1, q2
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: pop {r4, r5, r6, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI3_0: