Sergei Barannikov e0ed0333f0
Reland "[ARM] Stop gluing ALU nodes to branches / selects" (#118887)
Re-landing #116970 after fixing miscompilation error.

The original change made it possible for CMPZ to have multiple uses;
`ARMDAGToDAGISel::SelectCMPZ` was not prepared for this.

Pull Request: https://github.com/llvm/llvm-project/pull/118887


Original commit message:

Following #116547 and #116676, this PR changes the type of results and
operands of some nodes to accept / return a normal type instead of Glue.

Unfortunately, changing the result type of one node requires changing
the operand types of all potential consumer nodes, which in turn
requires changing the result types of all other possible producer nodes.
So this is a bulk change.
2024-12-07 10:14:36 +03:00

463 lines
14 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=armv7-eabi -mattr=-fpregs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-ARM
; RUN: llc -mtriple=armv7-eabi -mattr=+vfp2 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-VFP
; RUN: llc -mtriple=thumbv7-apple-darwin -mattr=+neon,+thumb2 %s -o - | FileCheck %s --check-prefix=CHECK-NEON
define i32 @f1(i32 %a.s) {
; CHECK-LABEL: f1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mov r1, #3
; CHECK-NEXT: cmp r0, #4
; CHECK-NEXT: movweq r1, #2
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bx lr
;
; CHECK-NEON-LABEL: f1:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: movs r1, #3
; CHECK-NEON-NEXT: cmp r0, #4
; CHECK-NEON-NEXT: it eq
; CHECK-NEON-NEXT: moveq r1, #2
; CHECK-NEON-NEXT: mov r0, r1
; CHECK-NEON-NEXT: bx lr
entry:
%tmp = icmp eq i32 %a.s, 4
%tmp1.s = select i1 %tmp, i32 2, i32 3
ret i32 %tmp1.s
}
define i32 @f2(i32 %a.s) {
; CHECK-LABEL: f2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mov r1, #3
; CHECK-NEXT: cmp r0, #4
; CHECK-NEXT: movwgt r1, #2
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bx lr
;
; CHECK-NEON-LABEL: f2:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: movs r1, #3
; CHECK-NEON-NEXT: cmp r0, #4
; CHECK-NEON-NEXT: it gt
; CHECK-NEON-NEXT: movgt r1, #2
; CHECK-NEON-NEXT: mov r0, r1
; CHECK-NEON-NEXT: bx lr
entry:
%tmp = icmp sgt i32 %a.s, 4
%tmp1.s = select i1 %tmp, i32 2, i32 3
ret i32 %tmp1.s
}
define i32 @f3(i32 %a.s, i32 %b.s) {
; CHECK-LABEL: f3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mov r2, #3
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: movwlt r2, #2
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: bx lr
;
; CHECK-NEON-LABEL: f3:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: movs r2, #3
; CHECK-NEON-NEXT: cmp r0, r1
; CHECK-NEON-NEXT: it lt
; CHECK-NEON-NEXT: movlt r2, #2
; CHECK-NEON-NEXT: mov r0, r2
; CHECK-NEON-NEXT: bx lr
entry:
%tmp = icmp slt i32 %a.s, %b.s
%tmp1.s = select i1 %tmp, i32 2, i32 3
ret i32 %tmp1.s
}
define i32 @f4(i32 %a.s, i32 %b.s) {
; CHECK-LABEL: f4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mov r2, #3
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: movwle r2, #2
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: bx lr
;
; CHECK-NEON-LABEL: f4:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: movs r2, #3
; CHECK-NEON-NEXT: cmp r0, r1
; CHECK-NEON-NEXT: it le
; CHECK-NEON-NEXT: movle r2, #2
; CHECK-NEON-NEXT: mov r0, r2
; CHECK-NEON-NEXT: bx lr
entry:
%tmp = icmp sle i32 %a.s, %b.s
%tmp1.s = select i1 %tmp, i32 2, i32 3
ret i32 %tmp1.s
}
define i32 @f5(i32 %a.u, i32 %b.u) {
; CHECK-LABEL: f5:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mov r2, #3
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: movwls r2, #2
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: bx lr
;
; CHECK-NEON-LABEL: f5:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: movs r2, #3
; CHECK-NEON-NEXT: cmp r0, r1
; CHECK-NEON-NEXT: it ls
; CHECK-NEON-NEXT: movls r2, #2
; CHECK-NEON-NEXT: mov r0, r2
; CHECK-NEON-NEXT: bx lr
entry:
%tmp = icmp ule i32 %a.u, %b.u
%tmp1.s = select i1 %tmp, i32 2, i32 3
ret i32 %tmp1.s
}
define i32 @f6(i32 %a.u, i32 %b.u) {
; CHECK-LABEL: f6:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mov r2, #3
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: movwhi r2, #2
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: bx lr
;
; CHECK-NEON-LABEL: f6:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: movs r2, #3
; CHECK-NEON-NEXT: cmp r0, r1
; CHECK-NEON-NEXT: it hi
; CHECK-NEON-NEXT: movhi r2, #2
; CHECK-NEON-NEXT: mov r0, r2
; CHECK-NEON-NEXT: bx lr
entry:
%tmp = icmp ugt i32 %a.u, %b.u
%tmp1.s = select i1 %tmp, i32 2, i32 3
ret i32 %tmp1.s
}
define double @f7(double %a, double %b) {
; CHECK-ARM-LABEL: f7:
; CHECK-ARM: @ %bb.0:
; CHECK-ARM-NEXT: .save {r4, r5, r11, lr}
; CHECK-ARM-NEXT: push {r4, r5, r11, lr}
; CHECK-ARM-NEXT: mov r4, r3
; CHECK-ARM-NEXT: movw r3, #48758
; CHECK-ARM-NEXT: mov r5, r2
; CHECK-ARM-NEXT: movw r2, #14680
; CHECK-ARM-NEXT: movt r2, #51380
; CHECK-ARM-NEXT: movt r3, #16371
; CHECK-ARM-NEXT: bl __aeabi_dcmplt
; CHECK-ARM-NEXT: cmp r0, #0
; CHECK-ARM-NEXT: movwne r4, #0
; CHECK-ARM-NEXT: movwne r5, #0
; CHECK-ARM-NEXT: movtne r4, #49136
; CHECK-ARM-NEXT: mov r0, r5
; CHECK-ARM-NEXT: mov r1, r4
; CHECK-ARM-NEXT: pop {r4, r5, r11, pc}
;
; CHECK-VFP-LABEL: f7:
; CHECK-VFP: @ %bb.0:
; CHECK-VFP-NEXT: vldr d17, .LCPI6_0
; CHECK-VFP-NEXT: vmov d18, r0, r1
; CHECK-VFP-NEXT: vmov.f64 d16, #-1.000000e+00
; CHECK-VFP-NEXT: vcmp.f64 d18, d17
; CHECK-VFP-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-VFP-NEXT: vmov d19, r2, r3
; CHECK-VFP-NEXT: vmovmi.f64 d19, d16
; CHECK-VFP-NEXT: vmov r0, r1, d19
; CHECK-VFP-NEXT: bx lr
; CHECK-VFP-NEXT: .p2align 3
; CHECK-VFP-NEXT: @ %bb.1:
; CHECK-VFP-NEXT: .LCPI6_0:
; CHECK-VFP-NEXT: .long 3367254360 @ double 1.234
; CHECK-VFP-NEXT: .long 1072938614
;
; CHECK-NEON-LABEL: f7:
; CHECK-NEON: @ %bb.0:
; CHECK-NEON-NEXT: vldr d17, LCPI6_0
; CHECK-NEON-NEXT: vmov d18, r0, r1
; CHECK-NEON-NEXT: vmov d19, r2, r3
; CHECK-NEON-NEXT: vcmp.f64 d18, d17
; CHECK-NEON-NEXT: vmov.f64 d16, #-1.000000e+00
; CHECK-NEON-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEON-NEXT: it mi
; CHECK-NEON-NEXT: vmovmi.f64 d19, d16
; CHECK-NEON-NEXT: vmov r0, r1, d19
; CHECK-NEON-NEXT: bx lr
; CHECK-NEON-NEXT: .p2align 3
; CHECK-NEON-NEXT: @ %bb.1:
; CHECK-NEON-NEXT: .data_region
; CHECK-NEON-NEXT: LCPI6_0:
; CHECK-NEON-NEXT: .long 3367254360 @ double 1.234
; CHECK-NEON-NEXT: .long 1072938614
; CHECK-NEON-NEXT: .end_data_region
%tmp = fcmp olt double %a, 1.234e+00
%tmp1 = select i1 %tmp, double -1.000e+00, double %b
ret double %tmp1
}
; <rdar://problem/7260094>
;
; We used to generate really horrible code for this function. The main cause was
; a lack of a custom lowering routine for an ISD::SELECT. This would result in
; two "it" blocks in the code: one for the "icmp" and another to move the index
; into the constant pool based on the value of the "icmp". If we have one "it"
; block generated, odds are good that we have close to the ideal code for this.
define arm_apcscc float @f8(i32 %a) nounwind {
; CHECK-ARM-LABEL: f8:
; CHECK-ARM: @ %bb.0:
; CHECK-ARM-NEXT: movw r1, #29905
; CHECK-ARM-NEXT: movw r2, #1123
; CHECK-ARM-NEXT: movt r1, #16408
; CHECK-ARM-NEXT: cmp r0, r2
; CHECK-ARM-NEXT: movweq r1, #62390
; CHECK-ARM-NEXT: movteq r1, #16285
; CHECK-ARM-NEXT: mov r0, r1
; CHECK-ARM-NEXT: bx lr
;
; CHECK-VFP-LABEL: f8:
; CHECK-VFP: @ %bb.0:
; CHECK-VFP-NEXT: movw r2, #1123
; CHECK-VFP-NEXT: adr r1, .LCPI7_0
; CHECK-VFP-NEXT: cmp r0, r2
; CHECK-VFP-NEXT: addeq r1, r1, #4
; CHECK-VFP-NEXT: ldr r0, [r1]
; CHECK-VFP-NEXT: bx lr
; CHECK-VFP-NEXT: .p2align 2
; CHECK-VFP-NEXT: @ %bb.1:
; CHECK-VFP-NEXT: .LCPI7_0:
; CHECK-VFP-NEXT: .long 0x401874d1 @ float 2.38212991
; CHECK-VFP-NEXT: .long 0x3f9df3b6 @ float 1.23399997
;
; CHECK-NEON-LABEL: f8:
; CHECK-NEON: @ %bb.0:
; CHECK-NEON-NEXT: adr r1, LCPI7_0
; CHECK-NEON-NEXT: movw r2, #1123
; CHECK-NEON-NEXT: cmp r0, r2
; CHECK-NEON-NEXT: it eq
; CHECK-NEON-NEXT: addeq r1, #4
; CHECK-NEON-NEXT: ldr r0, [r1]
; CHECK-NEON-NEXT: bx lr
; CHECK-NEON-NEXT: .p2align 2
; CHECK-NEON-NEXT: @ %bb.1:
; CHECK-NEON-NEXT: .data_region
; CHECK-NEON-NEXT: LCPI7_0:
; CHECK-NEON-NEXT: .long 0x401874d1 @ float 2.38212991
; CHECK-NEON-NEXT: .long 0x3f9df3b6 @ float 1.23399997
; CHECK-NEON-NEXT: .end_data_region
%tmp = icmp eq i32 %a, 1123
%tmp1 = select i1 %tmp, float 0x3FF3BE76C0000000, float 0x40030E9A20000000
ret float %tmp1
}
; <rdar://problem/9049552>
; Glue values can only have a single use, but the following test exposed a
; case where a SELECT was lowered with 2 uses of a comparison, causing the
; scheduler to assert.
declare ptr @objc_msgSend(ptr, ptr, ...)
define void @f9() optsize {
; CHECK-LABEL: f9:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, sp, #8
; CHECK-NEXT: movw r2, #0
; CHECK-NEXT: movw r3, #0
; CHECK-NEXT: mov r1, #1065353216
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movt r2, #16672
; CHECK-NEXT: movt r3, #32704
; CHECK-NEXT: strd r0, r1, [sp]
; CHECK-NEXT: bl objc_msgSend
; CHECK-NEXT: add sp, sp, #8
; CHECK-NEXT: pop {r11, pc}
;
; CHECK-NEON-LABEL: f9:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: str lr, [sp, #-4]!
; CHECK-NEON-NEXT: sub sp, #8
; CHECK-NEON-NEXT: movs r2, #0
; CHECK-NEON-NEXT: movs r3, #0
; CHECK-NEON-NEXT: mov.w r0, #1065353216
; CHECK-NEON-NEXT: movs r1, #0
; CHECK-NEON-NEXT: movt r2, #16672
; CHECK-NEON-NEXT: movt r3, #32704
; CHECK-NEON-NEXT: strd r1, r0, [sp]
; CHECK-NEON-NEXT: bl _objc_msgSend
; CHECK-NEON-NEXT: add sp, #8
; CHECK-NEON-NEXT: ldr lr, [sp], #4
; CHECK-NEON-NEXT: bx lr
entry:
%cmp = icmp eq ptr undef, inttoptr (i32 4 to ptr)
%conv191 = select i1 %cmp, float -3.000000e+00, float 0.000000e+00
%conv195 = select i1 %cmp, double -1.000000e+00, double 0.000000e+00
%add = fadd double %conv195, 1.100000e+01
%conv196 = fptrunc double %add to float
%add201 = fadd float undef, %conv191
%tmp484 = bitcast float %conv196 to i32
%tmp478 = bitcast float %add201 to i32
%tmp490 = insertvalue [2 x i32] undef, i32 %tmp484, 0
%tmp493 = insertvalue [2 x i32] %tmp490, i32 %tmp478, 1
call void @objc_msgSend(ptr undef, ptr undef, [2 x i32] %tmp493, i32 0, float 1.000000e+00) optsize
ret void
}
define float @f10(i32 %a, i32 %b) nounwind uwtable readnone ssp {
; CHECK-ARM-LABEL: f10:
; CHECK-ARM: @ %bb.0:
; CHECK-ARM-NEXT: mov r2, #0
; CHECK-ARM-NEXT: cmp r0, r1
; CHECK-ARM-NEXT: moveq r2, #1065353216
; CHECK-ARM-NEXT: mov r0, r2
; CHECK-ARM-NEXT: bx lr
;
; CHECK-VFP-LABEL: f10:
; CHECK-VFP: @ %bb.0:
; CHECK-VFP-NEXT: vmov.f32 s0, #1.000000e+00
; CHECK-VFP-NEXT: vldr s2, .LCPI9_0
; CHECK-VFP-NEXT: cmp r0, r1
; CHECK-VFP-NEXT: vmoveq.f32 s2, s0
; CHECK-VFP-NEXT: vmov r0, s2
; CHECK-VFP-NEXT: bx lr
; CHECK-VFP-NEXT: .p2align 2
; CHECK-VFP-NEXT: @ %bb.1:
; CHECK-VFP-NEXT: .LCPI9_0:
; CHECK-VFP-NEXT: .long 0x00000000 @ float 0
;
; CHECK-NEON-LABEL: f10:
; CHECK-NEON: @ %bb.0:
; CHECK-NEON-NEXT: vldr s2, LCPI9_0
; CHECK-NEON-NEXT: vmov.f32 s0, #1.000000e+00
; CHECK-NEON-NEXT: cmp r0, r1
; CHECK-NEON-NEXT: it eq
; CHECK-NEON-NEXT: vmoveq.f32 s2, s0
; CHECK-NEON-NEXT: vmov r0, s2
; CHECK-NEON-NEXT: bx lr
; CHECK-NEON-NEXT: .p2align 2
; CHECK-NEON-NEXT: @ %bb.1:
; CHECK-NEON-NEXT: .data_region
; CHECK-NEON-NEXT: LCPI9_0:
; CHECK-NEON-NEXT: .long 0x00000000 @ float 0
; CHECK-NEON-NEXT: .end_data_region
%1 = icmp eq i32 %a, %b
%2 = zext i1 %1 to i32
%3 = sitofp i32 %2 to float
ret float %3
}
define float @f11(i32 %a, i32 %b) nounwind uwtable readnone ssp {
; CHECK-ARM-LABEL: f11:
; CHECK-ARM: @ %bb.0:
; CHECK-ARM-NEXT: mov r2, #0
; CHECK-ARM-NEXT: cmp r0, r1
; CHECK-ARM-NEXT: movweq r2, #0
; CHECK-ARM-NEXT: movteq r2, #49024
; CHECK-ARM-NEXT: mov r0, r2
; CHECK-ARM-NEXT: bx lr
;
; CHECK-VFP-LABEL: f11:
; CHECK-VFP: @ %bb.0:
; CHECK-VFP-NEXT: vmov.f32 s0, #-1.000000e+00
; CHECK-VFP-NEXT: vldr s2, .LCPI10_0
; CHECK-VFP-NEXT: cmp r0, r1
; CHECK-VFP-NEXT: vmoveq.f32 s2, s0
; CHECK-VFP-NEXT: vmov r0, s2
; CHECK-VFP-NEXT: bx lr
; CHECK-VFP-NEXT: .p2align 2
; CHECK-VFP-NEXT: @ %bb.1:
; CHECK-VFP-NEXT: .LCPI10_0:
; CHECK-VFP-NEXT: .long 0x00000000 @ float 0
;
; CHECK-NEON-LABEL: f11:
; CHECK-NEON: @ %bb.0:
; CHECK-NEON-NEXT: vldr s2, LCPI10_0
; CHECK-NEON-NEXT: vmov.f32 s0, #-1.000000e+00
; CHECK-NEON-NEXT: cmp r0, r1
; CHECK-NEON-NEXT: it eq
; CHECK-NEON-NEXT: vmoveq.f32 s2, s0
; CHECK-NEON-NEXT: vmov r0, s2
; CHECK-NEON-NEXT: bx lr
; CHECK-NEON-NEXT: .p2align 2
; CHECK-NEON-NEXT: @ %bb.1:
; CHECK-NEON-NEXT: .data_region
; CHECK-NEON-NEXT: LCPI10_0:
; CHECK-NEON-NEXT: .long 0x00000000 @ float 0
; CHECK-NEON-NEXT: .end_data_region
%1 = icmp eq i32 %a, %b
%2 = sitofp i1 %1 to float
ret float %2
}
define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp {
; CHECK-ARM-LABEL: f12:
; CHECK-ARM: @ %bb.0:
; CHECK-ARM-NEXT: mov r2, #0
; CHECK-ARM-NEXT: cmp r0, r1
; CHECK-ARM-NEXT: moveq r2, #1065353216
; CHECK-ARM-NEXT: mov r0, r2
; CHECK-ARM-NEXT: bx lr
;
; CHECK-VFP-LABEL: f12:
; CHECK-VFP: @ %bb.0:
; CHECK-VFP-NEXT: vmov.f32 s0, #1.000000e+00
; CHECK-VFP-NEXT: vldr s2, .LCPI11_0
; CHECK-VFP-NEXT: cmp r0, r1
; CHECK-VFP-NEXT: vmoveq.f32 s2, s0
; CHECK-VFP-NEXT: vmov r0, s2
; CHECK-VFP-NEXT: bx lr
; CHECK-VFP-NEXT: .p2align 2
; CHECK-VFP-NEXT: @ %bb.1:
; CHECK-VFP-NEXT: .LCPI11_0:
; CHECK-VFP-NEXT: .long 0x00000000 @ float 0
;
; CHECK-NEON-LABEL: f12:
; CHECK-NEON: @ %bb.0:
; CHECK-NEON-NEXT: vldr s2, LCPI11_0
; CHECK-NEON-NEXT: vmov.f32 s0, #1.000000e+00
; CHECK-NEON-NEXT: cmp r0, r1
; CHECK-NEON-NEXT: it eq
; CHECK-NEON-NEXT: vmoveq.f32 s2, s0
; CHECK-NEON-NEXT: vmov r0, s2
; CHECK-NEON-NEXT: bx lr
; CHECK-NEON-NEXT: .p2align 2
; CHECK-NEON-NEXT: @ %bb.1:
; CHECK-NEON-NEXT: .data_region
; CHECK-NEON-NEXT: LCPI11_0:
; CHECK-NEON-NEXT: .long 0x00000000 @ float 0
; CHECK-NEON-NEXT: .end_data_region
%1 = icmp eq i32 %a, %b
%2 = uitofp i1 %1 to float
ret float %2
}
define i1 @test_overflow_recombine(i32 %in1, i32 %in2) {
; CHECK-LABEL: test_overflow_recombine:
; CHECK: @ %bb.0:
; CHECK-NEXT: mul r2, r0, r1
; CHECK-NEXT: smmul r0, r0, r1
; CHECK-NEXT: subs r0, r0, r2, asr #31
; CHECK-NEXT: movwne r0, #1
; CHECK-NEXT: bx lr
;
; CHECK-NEON-LABEL: test_overflow_recombine:
; CHECK-NEON: @ %bb.0:
; CHECK-NEON-NEXT: mul r2, r0, r1
; CHECK-NEON-NEXT: smmul r0, r0, r1
; CHECK-NEON-NEXT: subs.w r0, r0, r2, asr #31
; CHECK-NEON-NEXT: it ne
; CHECK-NEON-NEXT: movne r0, #1
; CHECK-NEON-NEXT: bx lr
%prod = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %in1, i32 %in2)
%overflow = extractvalue { i32, i1 } %prod, 1
ret i1 %overflow
}
declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32)