
This reverts commit 9c319d5bb40785c969d2af76535ca62448dfafa7. Some issues were discovered with the bootstrap builds, which seem like they were caused by this commit. I'm reverting to investigate.
263 lines
8.9 KiB
LLVM
263 lines
8.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16,+bf16 | FileCheck %s
|
|
|
|
; Check that building a vector from floats doesn't insert an unnecessary
|
|
; copy for lane zero.
|
|
define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind {
|
|
; CHECK-LABEL: foo:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
|
|
; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1
|
|
; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2
|
|
; CHECK-NEXT: // kill: def $s3 killed $s3 def $q3
|
|
; CHECK-NEXT: mov v0.s[1], v1.s[0]
|
|
; CHECK-NEXT: mov v0.s[2], v2.s[0]
|
|
; CHECK-NEXT: mov v0.s[3], v3.s[0]
|
|
; CHECK-NEXT: ret
|
|
%1 = insertelement <4 x float> undef, float %a, i32 0
|
|
%2 = insertelement <4 x float> %1, float %b, i32 1
|
|
%3 = insertelement <4 x float> %2, float %c, i32 2
|
|
%4 = insertelement <4 x float> %3, float %d, i32 3
|
|
ret <4 x float> %4
|
|
}
|
|
|
|
define <8 x i16> @build_all_zero(<8 x i16> %a) #1 {
|
|
; CHECK-LABEL: build_all_zero:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #44672 // =0xae80
|
|
; CHECK-NEXT: fmov s1, w8
|
|
; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
|
|
; CHECK-NEXT: ret
|
|
%b = add <8 x i16> %a, <i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>
|
|
%c = mul <8 x i16> %b, <i16 -20864, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>
|
|
ret <8 x i16> %c
|
|
}
|
|
|
|
; There is an optimization in DAG Combiner as following:
|
|
; fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
|
|
; -> (BUILD_VECTOR A, B, ..., C, D, ...)
|
|
; This case checks when A,B and C,D are different types, there should be no
|
|
; assertion failure.
|
|
define <8 x i16> @concat_2_build_vector(<4 x i16> %in0) {
|
|
; CHECK-LABEL: concat_2_build_vector:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v0.2d, #0000000000000000
|
|
; CHECK-NEXT: ret
|
|
%vshl_n = shl <4 x i16> %in0, <i16 8, i16 8, i16 8, i16 8>
|
|
%vshl_n2 = shl <4 x i16> %vshl_n, <i16 9, i16 9, i16 9, i16 9>
|
|
%shuffle.i = shufflevector <4 x i16> %vshl_n2, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
ret <8 x i16> %shuffle.i
|
|
}
|
|
|
|
; The lowering of a widened f16 BUILD_VECTOR tries to optimize it by building
|
|
; an equivalent integer vector and BITCAST-ing that. This case checks that
|
|
; normalizing the vector generates a valid result. The choice of the
|
|
; constant prevents earlier passes from replacing the BUILD_VECTOR.
|
|
define void @widen_f16_build_vector(ptr %addr) {
|
|
; CHECK-LABEL: widen_f16_build_vector:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #13294 // =0x33ee
|
|
; CHECK-NEXT: movk w8, #13294, lsl #16
|
|
; CHECK-NEXT: str w8, [x0]
|
|
; CHECK-NEXT: ret
|
|
store <2 x half> <half 0xH33EE, half 0xH33EE>, ptr %addr, align 2
|
|
ret void
|
|
}
|
|
|
|
; Check that a single element vector is constructed with a mov
|
|
define <1 x i64> @single_element_vector_i64(<1 x i64> %arg) {
|
|
; CHECK-LABEL: single_element_vector_i64:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: mov w8, #1 // =0x1
|
|
; CHECK-NEXT: fmov d1, x8
|
|
; CHECK-NEXT: add d0, d0, d1
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%add = add <1 x i64> %arg, <i64 1>
|
|
ret <1 x i64> %add
|
|
}
|
|
|
|
define <1 x double> @single_element_vector_double(<1 x double> %arg) {
|
|
; CHECK-LABEL: single_element_vector_double:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmov d1, #1.00000000
|
|
; CHECK-NEXT: fadd d0, d0, d1
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%add = fadd <1 x double> %arg, <double 1.0>
|
|
ret <1 x double> %add
|
|
}
|
|
|
|
; Make sure BUILD_VECTOR does not get stuck in a loop trying to convert a
|
|
; single element FP vector constant from a scalar to vector.
|
|
define <1 x double> @convert_single_fp_vector_constant(i1 %cmp) {
|
|
; CHECK-LABEL: convert_single_fp_vector_constant:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: tst w0, #0x1
|
|
; CHECK-NEXT: mov x8, #4607182418800017408 // =0x3ff0000000000000
|
|
; CHECK-NEXT: csetm x9, ne
|
|
; CHECK-NEXT: fmov d0, x8
|
|
; CHECK-NEXT: fmov d1, x9
|
|
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%sel = select i1 %cmp, <1 x double> <double 1.000000e+00>, <1 x double> zeroinitializer
|
|
ret <1 x double> %sel
|
|
}
|
|
|
|
; All Zero and All -Zero tests.
|
|
|
|
define <2 x double> @poszero_v2f64(<2 x double> %a) {
|
|
; CHECK-LABEL: poszero_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v1.2d, #0000000000000000
|
|
; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d
|
|
; CHECK-NEXT: ret
|
|
%b = fadd <2 x double> %a, <double 0.0, double 0.0>
|
|
ret <2 x double> %b
|
|
}
|
|
|
|
define <2 x double> @negzero_v2f64(<2 x double> %a) {
|
|
; CHECK-LABEL: negzero_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v1.2d, #0000000000000000
|
|
; CHECK-NEXT: fneg v1.2d, v1.2d
|
|
; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
|
|
; CHECK-NEXT: ret
|
|
%b = fmul <2 x double> %a, <double -0.0, double -0.0>
|
|
ret <2 x double> %b
|
|
}
|
|
|
|
define <1 x double> @poszero_v1f64(<1 x double> %a) {
|
|
; CHECK-LABEL: poszero_v1f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: fadd d0, d0, d1
|
|
; CHECK-NEXT: ret
|
|
%b = fadd <1 x double> %a, <double 0.0>
|
|
ret <1 x double> %b
|
|
}
|
|
|
|
define <1 x double> @negzero_v1f64(<1 x double> %a) {
|
|
; CHECK-LABEL: negzero_v1f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
|
|
; CHECK-NEXT: fmov d1, x8
|
|
; CHECK-NEXT: fmul d0, d0, d1
|
|
; CHECK-NEXT: ret
|
|
%b = fmul <1 x double> %a, <double -0.0>
|
|
ret <1 x double> %b
|
|
}
|
|
|
|
define <4 x float> @poszero_v4f32(<4 x float> %a) {
|
|
; CHECK-LABEL: poszero_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v1.2d, #0000000000000000
|
|
; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: ret
|
|
%b = fadd <4 x float> %a, <float 0.0, float 0.0, float 0.0, float 0.0>
|
|
ret <4 x float> %b
|
|
}
|
|
|
|
define <4 x float> @negzero_v4f32(<4 x float> %a) {
|
|
; CHECK-LABEL: negzero_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v1.4s, #128, lsl #24
|
|
; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: ret
|
|
%b = fmul <4 x float> %a, <float -0.0, float -0.0, float -0.0, float -0.0>
|
|
ret <4 x float> %b
|
|
}
|
|
|
|
define <2 x float> @poszero_v2f32(<2 x float> %a) {
|
|
; CHECK-LABEL: poszero_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s
|
|
; CHECK-NEXT: ret
|
|
%b = fadd <2 x float> %a, <float 0.0, float 0.0>
|
|
ret <2 x float> %b
|
|
}
|
|
|
|
define <2 x float> @negzero_v2f32(<2 x float> %a) {
|
|
; CHECK-LABEL: negzero_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v1.2s, #128, lsl #24
|
|
; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
|
|
; CHECK-NEXT: ret
|
|
%b = fmul <2 x float> %a, <float -0.0, float -0.0>
|
|
ret <2 x float> %b
|
|
}
|
|
|
|
define <8 x half> @poszero_v8f16(<8 x half> %a) {
|
|
; CHECK-LABEL: poszero_v8f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v1.2d, #0000000000000000
|
|
; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h
|
|
; CHECK-NEXT: ret
|
|
%b = fadd <8 x half> %a, <half 0.0, half 0.0, half 0.0, half 0.0, half 0.0, half 0.0, half 0.0, half 0.0>
|
|
ret <8 x half> %b
|
|
}
|
|
|
|
define <8 x half> @negzero_v8f16(<8 x half> %a) {
|
|
; CHECK-LABEL: negzero_v8f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v1.8h, #128, lsl #8
|
|
; CHECK-NEXT: fmul v0.8h, v0.8h, v1.8h
|
|
; CHECK-NEXT: ret
|
|
%b = fmul <8 x half> %a, <half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0>
|
|
ret <8 x half> %b
|
|
}
|
|
|
|
define <4 x half> @poszero_v4f16(<4 x half> %a) {
|
|
; CHECK-LABEL: poszero_v4f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h
|
|
; CHECK-NEXT: ret
|
|
%b = fadd <4 x half> %a, <half 0.0, half 0.0, half 0.0, half 0.0>
|
|
ret <4 x half> %b
|
|
}
|
|
|
|
define <4 x half> @negzero_v4f16(<4 x half> %a) {
|
|
; CHECK-LABEL: negzero_v4f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v1.4h, #128, lsl #8
|
|
; CHECK-NEXT: fmul v0.4h, v0.4h, v1.4h
|
|
; CHECK-NEXT: ret
|
|
%b = fmul <4 x half> %a, <half -0.0, half -0.0, half -0.0, half -0.0>
|
|
ret <4 x half> %b
|
|
}
|
|
|
|
define <8 x bfloat> @poszero_v8bf16(<8 x bfloat> %a) {
|
|
; CHECK-LABEL: poszero_v8bf16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v0.2d, #0000000000000000
|
|
; CHECK-NEXT: ret
|
|
ret <8 x bfloat> <bfloat 0.0, bfloat 0.0, bfloat 0.0, bfloat 0.0, bfloat 0.0, bfloat 0.0, bfloat 0.0, bfloat 0.0>
|
|
}
|
|
|
|
define <8 x bfloat> @negzero_v8bf16(<8 x bfloat> %a) {
|
|
; CHECK-LABEL: negzero_v8bf16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v0.8h, #128, lsl #8
|
|
; CHECK-NEXT: ret
|
|
ret <8 x bfloat> <bfloat -0.0, bfloat -0.0, bfloat -0.0, bfloat -0.0, bfloat -0.0, bfloat -0.0, bfloat -0.0, bfloat -0.0>
|
|
}
|
|
|
|
define <4 x bfloat> @poszero_v4bf16(<4 x bfloat> %a) {
|
|
; CHECK-LABEL: poszero_v4bf16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: ret
|
|
ret <4 x bfloat> <bfloat 0.0, bfloat 0.0, bfloat 0.0, bfloat 0.0>
|
|
}
|
|
|
|
define <4 x bfloat> @negzero_v4bf16(<4 x bfloat> %a) {
|
|
; CHECK-LABEL: negzero_v4bf16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi v0.4h, #128, lsl #8
|
|
; CHECK-NEXT: ret
|
|
ret <4 x bfloat> <bfloat -0.0, bfloat -0.0, bfloat -0.0, bfloat -0.0>
|
|
}
|