
GCC, correctly, doesn't vectorize in this case. Absence of direct instructions to convert larger fixed point to lower floating point precision inadvertently causes rounding leading to subtle differences across ISAs. https://godbolt.org/z/ssEchMWrE Co-authored by: @echristo
211 lines
6.4 KiB
LLVM
211 lines
6.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
|
|
|
|
define void @autogen_SD19655(ptr %addr, ptr %addrfloat) {
|
|
; CHECK-LABEL: autogen_SD19655:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: mov.d x8, v0[1]
|
|
; CHECK-NEXT: fmov x9, d0
|
|
; CHECK-NEXT: scvtf s1, x9
|
|
; CHECK-NEXT: scvtf s0, x8
|
|
; CHECK-NEXT: mov.s v1[1], v0[0]
|
|
; CHECK-NEXT: str d1, [x1]
|
|
; CHECK-NEXT: ret
|
|
%T = load <2 x i64>, ptr %addr
|
|
%F = sitofp <2 x i64> %T to <2 x float>
|
|
store <2 x float> %F, ptr %addrfloat
|
|
ret void
|
|
}
|
|
|
|
define <2 x double> @test_signed_v2i32_to_v2f64(<2 x i32> %v) nounwind readnone {
|
|
; CHECK-LABEL: test_signed_v2i32_to_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sshll.2d v0, v0, #0
|
|
; CHECK-NEXT: scvtf.2d v0, v0
|
|
; CHECK-NEXT: ret
|
|
%conv = sitofp <2 x i32> %v to <2 x double>
|
|
ret <2 x double> %conv
|
|
}
|
|
|
|
define <2 x double> @test_unsigned_v2i32_to_v2f64(<2 x i32> %v) nounwind readnone {
|
|
; CHECK-LABEL: test_unsigned_v2i32_to_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ushll.2d v0, v0, #0
|
|
; CHECK-NEXT: ucvtf.2d v0, v0
|
|
; CHECK-NEXT: ret
|
|
%conv = uitofp <2 x i32> %v to <2 x double>
|
|
ret <2 x double> %conv
|
|
}
|
|
|
|
define <2 x double> @test_signed_v2i16_to_v2f64(<2 x i16> %v) nounwind readnone {
|
|
; CHECK-LABEL: test_signed_v2i16_to_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: shl.2s v0, v0, #16
|
|
; CHECK-NEXT: sshr.2s v0, v0, #16
|
|
; CHECK-NEXT: sshll.2d v0, v0, #0
|
|
; CHECK-NEXT: scvtf.2d v0, v0
|
|
; CHECK-NEXT: ret
|
|
|
|
%conv = sitofp <2 x i16> %v to <2 x double>
|
|
ret <2 x double> %conv
|
|
}
|
|
define <2 x double> @test_unsigned_v2i16_to_v2f64(<2 x i16> %v) nounwind readnone {
|
|
; CHECK-LABEL: test_unsigned_v2i16_to_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d1, #0x00ffff0000ffff
|
|
; CHECK-NEXT: and.8b v0, v0, v1
|
|
; CHECK-NEXT: ushll.2d v0, v0, #0
|
|
; CHECK-NEXT: ucvtf.2d v0, v0
|
|
; CHECK-NEXT: ret
|
|
|
|
%conv = uitofp <2 x i16> %v to <2 x double>
|
|
ret <2 x double> %conv
|
|
}
|
|
|
|
define <2 x double> @test_signed_v2i8_to_v2f64(<2 x i8> %v) nounwind readnone {
|
|
; CHECK-LABEL: test_signed_v2i8_to_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: shl.2s v0, v0, #24
|
|
; CHECK-NEXT: sshr.2s v0, v0, #24
|
|
; CHECK-NEXT: sshll.2d v0, v0, #0
|
|
; CHECK-NEXT: scvtf.2d v0, v0
|
|
; CHECK-NEXT: ret
|
|
|
|
%conv = sitofp <2 x i8> %v to <2 x double>
|
|
ret <2 x double> %conv
|
|
}
|
|
define <2 x double> @test_unsigned_v2i8_to_v2f64(<2 x i8> %v) nounwind readnone {
|
|
; CHECK-LABEL: test_unsigned_v2i8_to_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d1, #0x0000ff000000ff
|
|
; CHECK-NEXT: and.8b v0, v0, v1
|
|
; CHECK-NEXT: ushll.2d v0, v0, #0
|
|
; CHECK-NEXT: ucvtf.2d v0, v0
|
|
; CHECK-NEXT: ret
|
|
|
|
%conv = uitofp <2 x i8> %v to <2 x double>
|
|
ret <2 x double> %conv
|
|
}
|
|
|
|
define <2 x float> @test_signed_v2i64_to_v2f32(<2 x i64> %v) nounwind readnone {
|
|
; CHECK-LABEL: test_signed_v2i64_to_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov.d x8, v0[1]
|
|
; CHECK-NEXT: fmov x9, d0
|
|
; CHECK-NEXT: scvtf s0, x9
|
|
; CHECK-NEXT: scvtf s1, x8
|
|
; CHECK-NEXT: mov.s v0[1], v1[0]
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
|
|
%conv = sitofp <2 x i64> %v to <2 x float>
|
|
ret <2 x float> %conv
|
|
}
|
|
define <2 x float> @test_unsigned_v2i64_to_v2f32(<2 x i64> %v) nounwind readnone {
|
|
; CHECK-LABEL: test_unsigned_v2i64_to_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov.d x8, v0[1]
|
|
; CHECK-NEXT: fmov x9, d0
|
|
; CHECK-NEXT: ucvtf s0, x9
|
|
; CHECK-NEXT: ucvtf s1, x8
|
|
; CHECK-NEXT: mov.s v0[1], v1[0]
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
|
|
%conv = uitofp <2 x i64> %v to <2 x float>
|
|
ret <2 x float> %conv
|
|
}
|
|
|
|
define <2 x float> @test_signed_v2i16_to_v2f32(<2 x i16> %v) nounwind readnone {
|
|
; CHECK-LABEL: test_signed_v2i16_to_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: shl.2s v0, v0, #16
|
|
; CHECK-NEXT: sshr.2s v0, v0, #16
|
|
; CHECK-NEXT: scvtf.2s v0, v0
|
|
; CHECK-NEXT: ret
|
|
|
|
%conv = sitofp <2 x i16> %v to <2 x float>
|
|
ret <2 x float> %conv
|
|
}
|
|
define <2 x float> @test_unsigned_v2i16_to_v2f32(<2 x i16> %v) nounwind readnone {
|
|
; CHECK-LABEL: test_unsigned_v2i16_to_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d1, #0x00ffff0000ffff
|
|
; CHECK-NEXT: and.8b v0, v0, v1
|
|
; CHECK-NEXT: ucvtf.2s v0, v0
|
|
; CHECK-NEXT: ret
|
|
|
|
%conv = uitofp <2 x i16> %v to <2 x float>
|
|
ret <2 x float> %conv
|
|
}
|
|
|
|
define <2 x float> @test_signed_v2i8_to_v2f32(<2 x i8> %v) nounwind readnone {
|
|
; CHECK-LABEL: test_signed_v2i8_to_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: shl.2s v0, v0, #24
|
|
; CHECK-NEXT: sshr.2s v0, v0, #24
|
|
; CHECK-NEXT: scvtf.2s v0, v0
|
|
; CHECK-NEXT: ret
|
|
|
|
%conv = sitofp <2 x i8> %v to <2 x float>
|
|
ret <2 x float> %conv
|
|
}
|
|
define <2 x float> @test_unsigned_v2i8_to_v2f32(<2 x i8> %v) nounwind readnone {
|
|
; CHECK-LABEL: test_unsigned_v2i8_to_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d1, #0x0000ff000000ff
|
|
; CHECK-NEXT: and.8b v0, v0, v1
|
|
; CHECK-NEXT: ucvtf.2s v0, v0
|
|
; CHECK-NEXT: ret
|
|
|
|
%conv = uitofp <2 x i8> %v to <2 x float>
|
|
ret <2 x float> %conv
|
|
}
|
|
|
|
define <4 x float> @test_signed_v4i16_to_v4f32(<4 x i16> %v) nounwind readnone {
|
|
; CHECK-LABEL: test_signed_v4i16_to_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sshll.4s v0, v0, #0
|
|
; CHECK-NEXT: scvtf.4s v0, v0
|
|
; CHECK-NEXT: ret
|
|
|
|
%conv = sitofp <4 x i16> %v to <4 x float>
|
|
ret <4 x float> %conv
|
|
}
|
|
|
|
define <4 x float> @test_unsigned_v4i16_to_v4f32(<4 x i16> %v) nounwind readnone {
|
|
; CHECK-LABEL: test_unsigned_v4i16_to_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ushll.4s v0, v0, #0
|
|
; CHECK-NEXT: ucvtf.4s v0, v0
|
|
; CHECK-NEXT: ret
|
|
|
|
%conv = uitofp <4 x i16> %v to <4 x float>
|
|
ret <4 x float> %conv
|
|
}
|
|
|
|
define <4 x float> @test_signed_v4i8_to_v4f32(<4 x i8> %v) nounwind readnone {
|
|
; CHECK-LABEL: test_signed_v4i8_to_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: shl.4h v0, v0, #8
|
|
; CHECK-NEXT: sshr.4h v0, v0, #8
|
|
; CHECK-NEXT: sshll.4s v0, v0, #0
|
|
; CHECK-NEXT: scvtf.4s v0, v0
|
|
; CHECK-NEXT: ret
|
|
|
|
%conv = sitofp <4 x i8> %v to <4 x float>
|
|
ret <4 x float> %conv
|
|
}
|
|
define <4 x float> @test_unsigned_v4i8_to_v4f32(<4 x i8> %v) nounwind readnone {
|
|
; CHECK-LABEL: test_unsigned_v4i8_to_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: bic.4h v0, #255, lsl #8
|
|
; CHECK-NEXT: ushll.4s v0, v0, #0
|
|
; CHECK-NEXT: ucvtf.4s v0, v0
|
|
; CHECK-NEXT: ret
|
|
|
|
%conv = uitofp <4 x i8> %v to <4 x float>
|
|
ret <4 x float> %conv
|
|
}
|