
Re-landing #116970 after fixing miscompilation error. The original change made it possible for CMPZ to have multiple uses; `ARMDAGToDAGISel::SelectCMPZ` was not prepared for this. Pull Request: https://github.com/llvm/llvm-project/pull/118887 Original commit message: Following #116547 and #116676, this PR changes the type of results and operands of some nodes to accept / return a normal type instead of Glue. Unfortunately, changing the result type of one node requires changing the operand types of all potential consumer nodes, which in turn requires changing the result types of all other possible producer nodes. So this is a bulk change.
485 lines
16 KiB
LLVM
485 lines
16 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt < %s -passes='print<cost-model>' -mtriple=arm-apple-ios6.0.0 -mcpu=cortex-a8 2>&1 -disable-output | FileCheck %s --check-prefix=COST
|
|
; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
|
|
; Make sure that ARM backend with NEON handles vselect.
|
|
|
|
define void @vmax_v4i32(ptr %m, <4 x i32> %a, <4 x i32> %b) {
|
|
; CHECK-LABEL: vmax_v4i32:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: add r1, sp, #8
|
|
; CHECK-NEXT: vldr d17, [sp]
|
|
; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
|
|
; CHECK-NEXT: vmov d16, r2, r3
|
|
; CHECK-NEXT: vmax.s32 q8, q8, q9
|
|
; CHECK-NEXT: vst1.64 {d16, d17}, [r0]
|
|
; CHECK-NEXT: mov pc, lr
|
|
%cmpres = icmp sgt <4 x i32> %a, %b
|
|
%maxres = select <4 x i1> %cmpres, <4 x i32> %a, <4 x i32> %b
|
|
store <4 x i32> %maxres, ptr %m
|
|
ret void
|
|
}
|
|
|
|
%T0_10 = type <16 x i16>
|
|
%T1_10 = type <16 x i1>
|
|
define void @func_blend10(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) {
|
|
; CHECK-LABEL: func_blend10:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vld1.16 {d16, d17}, [r1:128]!
|
|
; CHECK-NEXT: vld1.16 {d18, d19}, [r0:128]!
|
|
; CHECK-NEXT: vmin.s16 q8, q9, q8
|
|
; CHECK-NEXT: vld1.64 {d20, d21}, [r1:128]
|
|
; CHECK-NEXT: vld1.64 {d18, d19}, [r0:128]
|
|
; CHECK-NEXT: vmin.s16 q9, q9, q10
|
|
; CHECK-NEXT: vst1.16 {d16, d17}, [r3:128]!
|
|
; CHECK-NEXT: vst1.64 {d18, d19}, [r3:128]
|
|
; CHECK-NEXT: mov pc, lr
|
|
; COST: func_blend10
|
|
; COST: cost of 0 {{.*}} icmp
|
|
; COST: cost of 4 {{.*}} select
|
|
|
|
%v0 = load %T0_10, ptr %loadaddr
|
|
%v1 = load %T0_10, ptr %loadaddr2
|
|
%c = icmp slt %T0_10 %v0, %v1
|
|
%r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1
|
|
store %T0_10 %r, ptr %storeaddr
|
|
ret void
|
|
}
|
|
|
|
%T0_14 = type <8 x i32>
|
|
%T1_14 = type <8 x i1>
|
|
define void @func_blend14(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) {
|
|
; CHECK-LABEL: func_blend14:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vld1.32 {d16, d17}, [r1:128]!
|
|
; CHECK-NEXT: vld1.32 {d18, d19}, [r0:128]!
|
|
; CHECK-NEXT: vmin.s32 q8, q9, q8
|
|
; CHECK-NEXT: vld1.64 {d20, d21}, [r1:128]
|
|
; CHECK-NEXT: vld1.64 {d18, d19}, [r0:128]
|
|
; CHECK-NEXT: vmin.s32 q9, q9, q10
|
|
; CHECK-NEXT: vst1.32 {d16, d17}, [r3:128]!
|
|
; CHECK-NEXT: vst1.64 {d18, d19}, [r3:128]
|
|
; CHECK-NEXT: mov pc, lr
|
|
; COST: func_blend14
|
|
; COST: cost of 0 {{.*}} icmp
|
|
; COST: cost of 4 {{.*}} select
|
|
%v0 = load %T0_14, ptr %loadaddr
|
|
%v1 = load %T0_14, ptr %loadaddr2
|
|
%c = icmp slt %T0_14 %v0, %v1
|
|
%r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1
|
|
store %T0_14 %r, ptr %storeaddr
|
|
ret void
|
|
}
|
|
|
|
%T0_15 = type <16 x i32>
|
|
%T1_15 = type <16 x i1>
|
|
define void @func_blend15(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) {
|
|
; CHECK-LABEL: func_blend15:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vld1.32 {d16, d17}, [r1:128]!
|
|
; CHECK-NEXT: vld1.32 {d18, d19}, [r0:128]!
|
|
; CHECK-NEXT: vmin.s32 q8, q9, q8
|
|
; CHECK-NEXT: vld1.32 {d20, d21}, [r1:128]!
|
|
; CHECK-NEXT: vld1.32 {d22, d23}, [r0:128]!
|
|
; CHECK-NEXT: vmin.s32 q10, q11, q10
|
|
; CHECK-NEXT: vld1.32 {d24, d25}, [r1:128]!
|
|
; CHECK-NEXT: vld1.32 {d26, d27}, [r0:128]!
|
|
; CHECK-NEXT: vmin.s32 q12, q13, q12
|
|
; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128]
|
|
; CHECK-NEXT: vld1.64 {d22, d23}, [r0:128]
|
|
; CHECK-NEXT: vmin.s32 q9, q11, q9
|
|
; CHECK-NEXT: vst1.32 {d16, d17}, [r3:128]!
|
|
; CHECK-NEXT: vst1.32 {d20, d21}, [r3:128]!
|
|
; CHECK-NEXT: vst1.32 {d24, d25}, [r3:128]!
|
|
; CHECK-NEXT: vst1.64 {d18, d19}, [r3:128]
|
|
; CHECK-NEXT: mov pc, lr
|
|
; COST: func_blend15
|
|
; COST: cost of 0 {{.*}} icmp
|
|
; COST: cost of 8 {{.*}} select
|
|
|
|
%v0 = load %T0_15, ptr %loadaddr
|
|
%v1 = load %T0_15, ptr %loadaddr2
|
|
%c = icmp slt %T0_15 %v0, %v1
|
|
%r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1
|
|
store %T0_15 %r, ptr %storeaddr
|
|
ret void
|
|
}
|
|
|
|
; We adjusted the cost model of the following selects. When we improve code
|
|
; lowering we also need to adjust the cost.
|
|
%T0_18 = type <4 x i64>
|
|
%T1_18 = type <4 x i1>
|
|
define void @func_blend18(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) {
|
|
; CHECK-LABEL: func_blend18:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
|
; CHECK-NEXT: push {r4, r5, r6, lr}
|
|
; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]!
|
|
; CHECK-NEXT: vld1.64 {d22, d23}, [r0:128]!
|
|
; CHECK-NEXT: vmov r4, r6, d16
|
|
; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128]
|
|
; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128]
|
|
; CHECK-NEXT: vmov lr, r12, d18
|
|
; CHECK-NEXT: mov r0, #0
|
|
; CHECK-NEXT: vmov r2, r1, d20
|
|
; CHECK-NEXT: subs r2, r2, lr
|
|
; CHECK-NEXT: vmov r2, r5, d22
|
|
; CHECK-NEXT: sbcs r1, r1, r12
|
|
; CHECK-NEXT: mov r1, #0
|
|
; CHECK-NEXT: movlt r1, #1
|
|
; CHECK-NEXT: cmp r1, #0
|
|
; CHECK-NEXT: mvnne r1, #0
|
|
; CHECK-NEXT: subs r2, r2, r4
|
|
; CHECK-NEXT: sbcs r6, r5, r6
|
|
; CHECK-NEXT: vmov r2, r12, d17
|
|
; CHECK-NEXT: vmov r5, r4, d23
|
|
; CHECK-NEXT: mov r6, #0
|
|
; CHECK-NEXT: movlt r6, #1
|
|
; CHECK-NEXT: cmp r6, #0
|
|
; CHECK-NEXT: mvnne r6, #0
|
|
; CHECK-NEXT: subs r2, r5, r2
|
|
; CHECK-NEXT: sbcs r2, r4, r12
|
|
; CHECK-NEXT: vmov lr, r12, d19
|
|
; CHECK-NEXT: vmov r4, r5, d21
|
|
; CHECK-NEXT: mov r2, #0
|
|
; CHECK-NEXT: movlt r2, #1
|
|
; CHECK-NEXT: cmp r2, #0
|
|
; CHECK-NEXT: mvnne r2, #0
|
|
; CHECK-NEXT: vdup.32 d25, r2
|
|
; CHECK-NEXT: vdup.32 d24, r6
|
|
; CHECK-NEXT: vbit q8, q11, q12
|
|
; CHECK-NEXT: subs r4, r4, lr
|
|
; CHECK-NEXT: sbcs r5, r5, r12
|
|
; CHECK-NEXT: movlt r0, #1
|
|
; CHECK-NEXT: cmp r0, #0
|
|
; CHECK-NEXT: mvnne r0, #0
|
|
; CHECK-NEXT: vdup.32 d27, r0
|
|
; CHECK-NEXT: vdup.32 d26, r1
|
|
; CHECK-NEXT: vbit q9, q10, q13
|
|
; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128]!
|
|
; CHECK-NEXT: vst1.64 {d18, d19}, [r3:128]
|
|
; CHECK-NEXT: pop {r4, r5, r6, lr}
|
|
; CHECK-NEXT: mov pc, lr
|
|
; COST: func_blend18
|
|
; COST: cost of 0 {{.*}} icmp
|
|
; COST: cost of 21 {{.*}} select
|
|
%v0 = load %T0_18, ptr %loadaddr
|
|
%v1 = load %T0_18, ptr %loadaddr2
|
|
%c = icmp slt %T0_18 %v0, %v1
|
|
%r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1
|
|
store %T0_18 %r, ptr %storeaddr
|
|
ret void
|
|
}
|
|
|
|
%T0_19 = type <8 x i64>
|
|
%T1_19 = type <8 x i1>
|
|
define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) {
|
|
; CHECK-LABEL: func_blend19:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
|
; CHECK-NEXT: push {r4, r5, r6, lr}
|
|
; CHECK-NEXT: vld1.64 {d28, d29}, [r1:128]!
|
|
; CHECK-NEXT: mov lr, #0
|
|
; CHECK-NEXT: vld1.64 {d30, d31}, [r0:128]!
|
|
; CHECK-NEXT: vld1.64 {d20, d21}, [r1:128]!
|
|
; CHECK-NEXT: vld1.64 {d24, d25}, [r0:128]!
|
|
; CHECK-NEXT: vld1.64 {d22, d23}, [r1:128]!
|
|
; CHECK-NEXT: vld1.64 {d26, d27}, [r0:128]!
|
|
; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]
|
|
; CHECK-NEXT: vld1.64 {d18, d19}, [r0:128]
|
|
; CHECK-NEXT: vmov r0, r12, d16
|
|
; CHECK-NEXT: vmov r1, r2, d18
|
|
; CHECK-NEXT: subs r0, r1, r0
|
|
; CHECK-NEXT: vmov r1, r4, d25
|
|
; CHECK-NEXT: sbcs r0, r2, r12
|
|
; CHECK-NEXT: mov r12, #0
|
|
; CHECK-NEXT: vmov r2, r0, d21
|
|
; CHECK-NEXT: movlt r12, #1
|
|
; CHECK-NEXT: cmp r12, #0
|
|
; CHECK-NEXT: mvnne r12, #0
|
|
; CHECK-NEXT: subs r1, r1, r2
|
|
; CHECK-NEXT: sbcs r0, r4, r0
|
|
; CHECK-NEXT: vmov r2, r4, d24
|
|
; CHECK-NEXT: mov r0, #0
|
|
; CHECK-NEXT: movlt r0, #1
|
|
; CHECK-NEXT: cmp r0, #0
|
|
; CHECK-NEXT: mvnne r0, #0
|
|
; CHECK-NEXT: vdup.32 d1, r0
|
|
; CHECK-NEXT: vmov r0, r1, d20
|
|
; CHECK-NEXT: subs r0, r2, r0
|
|
; CHECK-NEXT: sbcs r0, r4, r1
|
|
; CHECK-NEXT: vmov r2, r4, d26
|
|
; CHECK-NEXT: mov r0, #0
|
|
; CHECK-NEXT: movlt r0, #1
|
|
; CHECK-NEXT: cmp r0, #0
|
|
; CHECK-NEXT: mvnne r0, #0
|
|
; CHECK-NEXT: vdup.32 d0, r0
|
|
; CHECK-NEXT: vmov r0, r1, d22
|
|
; CHECK-NEXT: subs r0, r2, r0
|
|
; CHECK-NEXT: mov r2, #0
|
|
; CHECK-NEXT: sbcs r0, r4, r1
|
|
; CHECK-NEXT: vmov r4, r5, d31
|
|
; CHECK-NEXT: vmov r0, r1, d29
|
|
; CHECK-NEXT: movlt r2, #1
|
|
; CHECK-NEXT: cmp r2, #0
|
|
; CHECK-NEXT: mvnne r2, #0
|
|
; CHECK-NEXT: subs r0, r4, r0
|
|
; CHECK-NEXT: sbcs r0, r5, r1
|
|
; CHECK-NEXT: vmov r4, r5, d30
|
|
; CHECK-NEXT: mov r0, #0
|
|
; CHECK-NEXT: movlt r0, #1
|
|
; CHECK-NEXT: cmp r0, #0
|
|
; CHECK-NEXT: mvnne r0, #0
|
|
; CHECK-NEXT: vdup.32 d3, r0
|
|
; CHECK-NEXT: vmov r0, r1, d28
|
|
; CHECK-NEXT: subs r0, r4, r0
|
|
; CHECK-NEXT: sbcs r0, r5, r1
|
|
; CHECK-NEXT: vmov r4, r5, d27
|
|
; CHECK-NEXT: mov r0, #0
|
|
; CHECK-NEXT: movlt r0, #1
|
|
; CHECK-NEXT: cmp r0, #0
|
|
; CHECK-NEXT: mvnne r0, #0
|
|
; CHECK-NEXT: vdup.32 d2, r0
|
|
; CHECK-NEXT: vmov r0, r1, d23
|
|
; CHECK-NEXT: vbit q14, q15, q1
|
|
; CHECK-NEXT: vbit q10, q12, q0
|
|
; CHECK-NEXT: subs r0, r4, r0
|
|
; CHECK-NEXT: sbcs r0, r5, r1
|
|
; CHECK-NEXT: vmov r1, r4, d17
|
|
; CHECK-NEXT: vmov r5, r6, d19
|
|
; CHECK-NEXT: mov r0, #0
|
|
; CHECK-NEXT: movlt r0, #1
|
|
; CHECK-NEXT: cmp r0, #0
|
|
; CHECK-NEXT: mvnne r0, #0
|
|
; CHECK-NEXT: vdup.32 d31, r0
|
|
; CHECK-NEXT: vdup.32 d30, r2
|
|
; CHECK-NEXT: vbit q11, q13, q15
|
|
; CHECK-NEXT: vst1.64 {d28, d29}, [r3:128]!
|
|
; CHECK-NEXT: subs r1, r5, r1
|
|
; CHECK-NEXT: sbcs r1, r6, r4
|
|
; CHECK-NEXT: movlt lr, #1
|
|
; CHECK-NEXT: cmp lr, #0
|
|
; CHECK-NEXT: mvnne lr, #0
|
|
; CHECK-NEXT: vdup.32 d3, lr
|
|
; CHECK-NEXT: vdup.32 d2, r12
|
|
; CHECK-NEXT: vbit q8, q9, q1
|
|
; CHECK-NEXT: vst1.64 {d20, d21}, [r3:128]!
|
|
; CHECK-NEXT: vst1.64 {d22, d23}, [r3:128]!
|
|
; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128]
|
|
; CHECK-NEXT: pop {r4, r5, r6, lr}
|
|
; CHECK-NEXT: mov pc, lr
|
|
; COST: func_blend19
|
|
; COST: cost of 0 {{.*}} icmp
|
|
; COST: cost of 54 {{.*}} select
|
|
%v0 = load %T0_19, ptr %loadaddr
|
|
%v1 = load %T0_19, ptr %loadaddr2
|
|
%c = icmp slt %T0_19 %v0, %v1
|
|
%r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1
|
|
store %T0_19 %r, ptr %storeaddr
|
|
ret void
|
|
}
|
|
|
|
%T0_20 = type <16 x i64>
|
|
%T1_20 = type <16 x i1>
|
|
define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) {
|
|
; CHECK-LABEL: func_blend20:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
|
|
; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
|
|
; CHECK-NEXT: .vsave {d8, d9}
|
|
; CHECK-NEXT: vpush {d8, d9}
|
|
; CHECK-NEXT: add r8, r1, #64
|
|
; CHECK-NEXT: add lr, r0, #64
|
|
; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]!
|
|
; CHECK-NEXT: mov r12, #0
|
|
; CHECK-NEXT: vld1.64 {d24, d25}, [r0:128]!
|
|
; CHECK-NEXT: vmov r4, r5, d17
|
|
; CHECK-NEXT: vmov r6, r7, d25
|
|
; CHECK-NEXT: vld1.64 {d18, d19}, [lr:128]!
|
|
; CHECK-NEXT: vld1.64 {d20, d21}, [r8:128]!
|
|
; CHECK-NEXT: vld1.64 {d22, d23}, [r8:128]!
|
|
; CHECK-NEXT: vld1.64 {d0, d1}, [lr:128]!
|
|
; CHECK-NEXT: subs r4, r6, r4
|
|
; CHECK-NEXT: sbcs r4, r7, r5
|
|
; CHECK-NEXT: vmov r5, r6, d16
|
|
; CHECK-NEXT: vmov r7, r2, d24
|
|
; CHECK-NEXT: mov r4, #0
|
|
; CHECK-NEXT: movlt r4, #1
|
|
; CHECK-NEXT: cmp r4, #0
|
|
; CHECK-NEXT: mvnne r4, #0
|
|
; CHECK-NEXT: vdup.32 d27, r4
|
|
; CHECK-NEXT: subs r5, r7, r5
|
|
; CHECK-NEXT: sbcs r2, r2, r6
|
|
; CHECK-NEXT: vmov r5, r6, d1
|
|
; CHECK-NEXT: mov r2, #0
|
|
; CHECK-NEXT: movlt r2, #1
|
|
; CHECK-NEXT: cmp r2, #0
|
|
; CHECK-NEXT: mvnne r2, #0
|
|
; CHECK-NEXT: vdup.32 d26, r2
|
|
; CHECK-NEXT: vmov r2, r4, d23
|
|
; CHECK-NEXT: vbit q8, q12, q13
|
|
; CHECK-NEXT: vld1.64 {d24, d25}, [r0:128]!
|
|
; CHECK-NEXT: vld1.64 {d26, d27}, [r1:128]!
|
|
; CHECK-NEXT: vld1.64 {d28, d29}, [lr:128]!
|
|
; CHECK-NEXT: subs r2, r5, r2
|
|
; CHECK-NEXT: sbcs r2, r6, r4
|
|
; CHECK-NEXT: vmov r4, r5, d22
|
|
; CHECK-NEXT: vmov r6, r7, d0
|
|
; CHECK-NEXT: mov r2, #0
|
|
; CHECK-NEXT: movlt r2, #1
|
|
; CHECK-NEXT: cmp r2, #0
|
|
; CHECK-NEXT: mvnne r2, #0
|
|
; CHECK-NEXT: vdup.32 d3, r2
|
|
; CHECK-NEXT: subs r4, r6, r4
|
|
; CHECK-NEXT: sbcs r4, r7, r5
|
|
; CHECK-NEXT: vmov r2, r5, d27
|
|
; CHECK-NEXT: vmov r6, r7, d25
|
|
; CHECK-NEXT: mov r4, #0
|
|
; CHECK-NEXT: movlt r4, #1
|
|
; CHECK-NEXT: cmp r4, #0
|
|
; CHECK-NEXT: mvnne r4, #0
|
|
; CHECK-NEXT: vdup.32 d2, r4
|
|
; CHECK-NEXT: subs r2, r6, r2
|
|
; CHECK-NEXT: sbcs r2, r7, r5
|
|
; CHECK-NEXT: vmov r6, r7, d24
|
|
; CHECK-NEXT: mov r2, #0
|
|
; CHECK-NEXT: movlt r2, #1
|
|
; CHECK-NEXT: cmp r2, #0
|
|
; CHECK-NEXT: mvnne r2, #0
|
|
; CHECK-NEXT: vdup.32 d5, r2
|
|
; CHECK-NEXT: vmov r2, r5, d26
|
|
; CHECK-NEXT: subs r2, r6, r2
|
|
; CHECK-NEXT: sbcs r2, r7, r5
|
|
; CHECK-NEXT: vmov r6, r7, d19
|
|
; CHECK-NEXT: mov r2, #0
|
|
; CHECK-NEXT: movlt r2, #1
|
|
; CHECK-NEXT: cmp r2, #0
|
|
; CHECK-NEXT: mvnne r2, #0
|
|
; CHECK-NEXT: vdup.32 d4, r2
|
|
; CHECK-NEXT: vmov r2, r5, d21
|
|
; CHECK-NEXT: subs r2, r6, r2
|
|
; CHECK-NEXT: sbcs r2, r7, r5
|
|
; CHECK-NEXT: vmov r6, r7, d18
|
|
; CHECK-NEXT: mov r2, #0
|
|
; CHECK-NEXT: movlt r2, #1
|
|
; CHECK-NEXT: cmp r2, #0
|
|
; CHECK-NEXT: mvnne r2, #0
|
|
; CHECK-NEXT: vdup.32 d31, r2
|
|
; CHECK-NEXT: vmov r2, r5, d20
|
|
; CHECK-NEXT: subs r2, r6, r2
|
|
; CHECK-NEXT: sbcs r2, r7, r5
|
|
; CHECK-NEXT: mov r2, #0
|
|
; CHECK-NEXT: movlt r2, #1
|
|
; CHECK-NEXT: cmp r2, #0
|
|
; CHECK-NEXT: mvnne r2, #0
|
|
; CHECK-NEXT: vdup.32 d30, r2
|
|
; CHECK-NEXT: vbif q9, q10, q15
|
|
; CHECK-NEXT: vld1.64 {d30, d31}, [r8:128]!
|
|
; CHECK-NEXT: vld1.64 {d20, d21}, [r8:128]
|
|
; CHECK-NEXT: vbit q13, q12, q2
|
|
; CHECK-NEXT: vld1.64 {d24, d25}, [lr:128]
|
|
; CHECK-NEXT: vmov r2, r7, d21
|
|
; CHECK-NEXT: vbit q11, q0, q1
|
|
; CHECK-NEXT: mov lr, #0
|
|
; CHECK-NEXT: vmov r6, r5, d25
|
|
; CHECK-NEXT: vld1.64 {d4, d5}, [r1:128]!
|
|
; CHECK-NEXT: vld1.64 {d6, d7}, [r0:128]!
|
|
; CHECK-NEXT: vld1.64 {d0, d1}, [r1:128]
|
|
; CHECK-NEXT: vld1.64 {d2, d3}, [r0:128]
|
|
; CHECK-NEXT: subs r1, r6, r2
|
|
; CHECK-NEXT: vmov r0, r6, d2
|
|
; CHECK-NEXT: sbcs r1, r5, r7
|
|
; CHECK-NEXT: vmov r2, r7, d0
|
|
; CHECK-NEXT: movlt lr, #1
|
|
; CHECK-NEXT: cmp lr, #0
|
|
; CHECK-NEXT: mvnne lr, #0
|
|
; CHECK-NEXT: subs r0, r0, r2
|
|
; CHECK-NEXT: sbcs r0, r6, r7
|
|
; CHECK-NEXT: vmov r2, r7, d30
|
|
; CHECK-NEXT: vmov r6, r5, d28
|
|
; CHECK-NEXT: mov r0, #0
|
|
; CHECK-NEXT: movlt r0, #1
|
|
; CHECK-NEXT: cmp r0, #0
|
|
; CHECK-NEXT: mvnne r0, #0
|
|
; CHECK-NEXT: subs r2, r6, r2
|
|
; CHECK-NEXT: sbcs r2, r5, r7
|
|
; CHECK-NEXT: vmov r7, r6, d31
|
|
; CHECK-NEXT: vmov r5, r4, d29
|
|
; CHECK-NEXT: mov r2, #0
|
|
; CHECK-NEXT: movlt r2, #1
|
|
; CHECK-NEXT: cmp r2, #0
|
|
; CHECK-NEXT: mvnne r2, #0
|
|
; CHECK-NEXT: subs r7, r5, r7
|
|
; CHECK-NEXT: vmov r5, r1, d7
|
|
; CHECK-NEXT: sbcs r7, r4, r6
|
|
; CHECK-NEXT: mov r4, #0
|
|
; CHECK-NEXT: vmov r7, r6, d5
|
|
; CHECK-NEXT: movlt r4, #1
|
|
; CHECK-NEXT: cmp r4, #0
|
|
; CHECK-NEXT: mvnne r4, #0
|
|
; CHECK-NEXT: subs r5, r5, r7
|
|
; CHECK-NEXT: sbcs r1, r1, r6
|
|
; CHECK-NEXT: vmov r6, r7, d6
|
|
; CHECK-NEXT: mov r1, #0
|
|
; CHECK-NEXT: movlt r1, #1
|
|
; CHECK-NEXT: cmp r1, #0
|
|
; CHECK-NEXT: mvnne r1, #0
|
|
; CHECK-NEXT: vdup.32 d9, r1
|
|
; CHECK-NEXT: vmov r1, r5, d4
|
|
; CHECK-NEXT: subs r1, r6, r1
|
|
; CHECK-NEXT: sbcs r1, r7, r5
|
|
; CHECK-NEXT: vmov r6, r7, d3
|
|
; CHECK-NEXT: mov r1, #0
|
|
; CHECK-NEXT: movlt r1, #1
|
|
; CHECK-NEXT: cmp r1, #0
|
|
; CHECK-NEXT: mvnne r1, #0
|
|
; CHECK-NEXT: vdup.32 d8, r1
|
|
; CHECK-NEXT: vmov r1, r5, d1
|
|
; CHECK-NEXT: vbit q2, q3, q4
|
|
; CHECK-NEXT: vdup.32 d9, r4
|
|
; CHECK-NEXT: vdup.32 d8, r2
|
|
; CHECK-NEXT: subs r1, r6, r1
|
|
; CHECK-NEXT: sbcs r1, r7, r5
|
|
; CHECK-NEXT: vmov r5, r6, d24
|
|
; CHECK-NEXT: mov r1, #0
|
|
; CHECK-NEXT: movlt r1, #1
|
|
; CHECK-NEXT: cmp r1, #0
|
|
; CHECK-NEXT: mvnne r1, #0
|
|
; CHECK-NEXT: vdup.32 d7, r1
|
|
; CHECK-NEXT: vmov r1, r4, d20
|
|
; CHECK-NEXT: vdup.32 d6, r0
|
|
; CHECK-NEXT: subs r1, r5, r1
|
|
; CHECK-NEXT: mov r1, r3
|
|
; CHECK-NEXT: sbcs r0, r6, r4
|
|
; CHECK-NEXT: vst1.64 {d16, d17}, [r1:128]!
|
|
; CHECK-NEXT: vorr q8, q4, q4
|
|
; CHECK-NEXT: movlt r12, #1
|
|
; CHECK-NEXT: cmp r12, #0
|
|
; CHECK-NEXT: vbsl q8, q14, q15
|
|
; CHECK-NEXT: vdup.32 d29, lr
|
|
; CHECK-NEXT: vorr q15, q3, q3
|
|
; CHECK-NEXT: mvnne r12, #0
|
|
; CHECK-NEXT: vdup.32 d28, r12
|
|
; CHECK-NEXT: add r0, r3, #64
|
|
; CHECK-NEXT: vbsl q15, q1, q0
|
|
; CHECK-NEXT: vst1.64 {d26, d27}, [r1:128]!
|
|
; CHECK-NEXT: vbit q10, q12, q14
|
|
; CHECK-NEXT: vst1.64 {d18, d19}, [r0:128]!
|
|
; CHECK-NEXT: vst1.64 {d22, d23}, [r0:128]!
|
|
; CHECK-NEXT: vst1.64 {d4, d5}, [r1:128]!
|
|
; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]!
|
|
; CHECK-NEXT: vst1.64 {d30, d31}, [r1:128]
|
|
; CHECK-NEXT: vst1.64 {d20, d21}, [r0:128]
|
|
; CHECK-NEXT: vpop {d8, d9}
|
|
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr}
|
|
; CHECK-NEXT: mov pc, lr
|
|
; COST: func_blend20
|
|
; COST: cost of 0 {{.*}} icmp
|
|
; COST: cost of 108 {{.*}} select
|
|
%v0 = load %T0_20, ptr %loadaddr
|
|
%v1 = load %T0_20, ptr %loadaddr2
|
|
%c = icmp slt %T0_20 %v0, %v1
|
|
%r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1
|
|
store %T0_20 %r, ptr %storeaddr
|
|
ret void
|
|
}
|