
This DAG combine was incorrect for big-endian targets, because it assumes that when a bitcast changes the lane width, the least-significant bits of the wider lanes are in the lower-numbered lanes of the smaller type, which is only true for little-endian.
53 lines
1.7 KiB
LLVM
53 lines
1.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon < %s | FileCheck %s --check-prefix=LE
|
|
; RUN: llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon < %s | FileCheck %s --check-prefix=BE
|
|
|
|
define i32 @test(i64 %arg1) {
|
|
; LE-LABEL: test:
|
|
; LE: @ %bb.0: @ %entry
|
|
; LE-NEXT: subs r0, r0, #1
|
|
; LE-NEXT: mov r2, #0
|
|
; LE-NEXT: sbcs r0, r1, #0
|
|
; LE-NEXT: vldr s0, .LCPI0_0
|
|
; LE-NEXT: movwhs r2, #1
|
|
; LE-NEXT: cmp r2, #0
|
|
; LE-NEXT: mvnne r2, #0
|
|
; LE-NEXT: vmov s1, r2
|
|
; LE-NEXT: vmovn.i32 d16, q0
|
|
; LE-NEXT: vmovn.i16 d16, q8
|
|
; LE-NEXT: vmov.u8 r0, d16[0]
|
|
; LE-NEXT: and r0, r0, #1
|
|
; LE-NEXT: bx lr
|
|
; LE-NEXT: .p2align 2
|
|
; LE-NEXT: @ %bb.1:
|
|
; LE-NEXT: .LCPI0_0:
|
|
; LE-NEXT: .long 0xffffffff @ float NaN
|
|
;
|
|
; BE-LABEL: test:
|
|
; BE: @ %bb.0: @ %entry
|
|
; BE-NEXT: subs r1, r1, #1
|
|
; BE-NEXT: mov r2, #0
|
|
; BE-NEXT: sbcs r0, r0, #0
|
|
; BE-NEXT: vldr s0, .LCPI0_0
|
|
; BE-NEXT: movwhs r2, #1
|
|
; BE-NEXT: cmp r2, #0
|
|
; BE-NEXT: mvnne r2, #0
|
|
; BE-NEXT: vmov s1, r2
|
|
; BE-NEXT: vmovn.i32 d16, q0
|
|
; BE-NEXT: vmovn.i16 d16, q8
|
|
; BE-NEXT: vmov.u8 r0, d16[0]
|
|
; BE-NEXT: and r0, r0, #1
|
|
; BE-NEXT: bx lr
|
|
; BE-NEXT: .p2align 2
|
|
; BE-NEXT: @ %bb.1:
|
|
; BE-NEXT: .LCPI0_0:
|
|
; BE-NEXT: .long 0xffffffff @ float NaN
|
|
entry:
|
|
%insert_zero = insertelement <8 x i64> poison, i64 %arg1, i64 0
|
|
%splat_zero = shufflevector <8 x i64> %insert_zero, <8 x i64> poison, <8 x i32> zeroinitializer
|
|
%cmp_vec = icmp ule <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %splat_zero
|
|
%first_cmp = extractelement <8 x i1> %cmp_vec, i32 0
|
|
%ext = zext i1 %first_cmp to i32
|
|
ret i32 %ext
|
|
}
|