From 2e3f2523e624a4a922c386f6f1264c19f25a2e26 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 29 Sep 2025 16:54:49 +0100 Subject: [PATCH] [AArch64] Add global isel coverage for fp16 tests and strict-fp. NFC --- .../CodeGen/AArch64/fp16-v4-instructions.ll | 732 ++++++--- .../CodeGen/AArch64/fp16-v8-instructions.ll | 1448 ++++++++++++----- llvm/test/CodeGen/AArch64/strict-fp-opt.ll | 148 +- 3 files changed, 1620 insertions(+), 708 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll index 8bc3497ad3c3..6233ce743b70 100644 --- a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll @@ -1,20 +1,30 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=aarch64 -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-CVT -; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-FP16 +; RUN: llc < %s -mtriple=aarch64 -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT,CHECK-CVT-SD +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-FP16-SD +; RUN: llc < %s -mtriple=aarch64 -mattr=-fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-CVT,CHECK-CVT-GI +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-FP16-GI define <4 x half> @add_h(<4 x half> %a, <4 x half> %b) { -; CHECK-CVT-LABEL: add_h: -; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: add_h: +; CHECK-CVT-SD: // %bb.0: // %entry +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: add_h: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fadd v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: add_h: +; CHECK-CVT-GI: // %bb.0: // %entry +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-GI-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret entry: %0 = fadd <4 x half> %a, %b @@ -22,28 +32,54 @@ entry: } define <4 x half> @build_h4(<4 x half> %a) { -; CHECK-COMMON-LABEL: build_h4: -; CHECK-COMMON: // %bb.0: // %entry -; CHECK-COMMON-NEXT: mov w8, #15565 // =0x3ccd -; CHECK-COMMON-NEXT: dup v0.4h, w8 -; CHECK-COMMON-NEXT: ret +; CHECK-CVT-SD-LABEL: build_h4: +; CHECK-CVT-SD: // %bb.0: // %entry +; CHECK-CVT-SD-NEXT: mov w8, #15565 // =0x3ccd +; CHECK-CVT-SD-NEXT: dup v0.4h, w8 +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: build_h4: +; CHECK-FP16-SD: // %bb.0: // %entry +; CHECK-FP16-SD-NEXT: mov w8, #15565 // =0x3ccd +; CHECK-FP16-SD-NEXT: dup v0.4h, w8 +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: build_h4: +; CHECK-CVT-GI: // %bb.0: // %entry +; CHECK-CVT-GI-NEXT: adrp x8, .LCPI1_0 +; CHECK-CVT-GI-NEXT: ldr d0, [x8, :lo12:.LCPI1_0] +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: build_h4: +; CHECK-FP16-GI: // %bb.0: // %entry +; CHECK-FP16-GI-NEXT: adrp x8, .LCPI1_0 +; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI1_0] +; CHECK-FP16-GI-NEXT: ret entry: ret <4 x half> } define <4 x half> @sub_h(<4 x half> %a, <4 x half> %b) { -; CHECK-CVT-LABEL: sub_h: -; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fsub v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: sub_h: +; CHECK-CVT-SD: // %bb.0: // %entry +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fsub v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: sub_h: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fsub v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: sub_h: +; CHECK-CVT-GI: // %bb.0: // %entry +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-GI-NEXT: fsub v0.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret entry: %0 = fsub <4 x half> %a, %b @@ -51,18 +87,26 @@ entry: } define <4 x half> @mul_h(<4 x half> %a, <4 x half> %b) { -; CHECK-CVT-LABEL: mul_h: -; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fmul v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: mul_h: +; CHECK-CVT-SD: // %bb.0: // %entry +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fmul v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: mul_h: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fmul v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: mul_h: +; CHECK-CVT-GI: // %bb.0: // %entry +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-GI-NEXT: fmul v0.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret entry: %0 = fmul <4 x half> %a, %b @@ -70,18 +114,26 @@ entry: } define <4 x half> @div_h(<4 x half> %a, <4 x half> %b) { -; CHECK-CVT-LABEL: div_h: -; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fdiv v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: div_h: +; CHECK-CVT-SD: // %bb.0: // %entry +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fdiv v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: div_h: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fdiv v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: div_h: +; CHECK-CVT-GI: // %bb.0: // %entry +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-GI-NEXT: fdiv v0.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret entry: %0 = fdiv <4 x half> %a, %b @@ -89,92 +141,162 @@ entry: } define <4 x half> @load_h(ptr %a) { -; CHECK-COMMON-LABEL: load_h: -; CHECK-COMMON: // %bb.0: // %entry -; CHECK-COMMON-NEXT: ldr d0, [x0] -; CHECK-COMMON-NEXT: ret +; CHECK-LABEL: load_h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ret entry: %0 = load <4 x half>, ptr %a, align 4 ret <4 x half> %0 } define void @store_h(ptr %a, <4 x half> %b) { -; CHECK-COMMON-LABEL: store_h: -; CHECK-COMMON: // %bb.0: // %entry -; CHECK-COMMON-NEXT: str d0, [x0] -; CHECK-COMMON-NEXT: ret +; CHECK-LABEL: store_h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret entry: store <4 x half> %b, ptr %a, align 4 ret void } define <4 x half> @s_to_h(<4 x float> %a) { -; CHECK-COMMON-LABEL: s_to_h: -; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: fcvtn v0.4h, v0.4s -; CHECK-COMMON-NEXT: ret +; CHECK-LABEL: s_to_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: ret %1 = fptrunc <4 x float> %a to <4 x half> ret <4 x half> %1 } define <4 x half> @d_to_h(<4 x double> %a) { -; CHECK-COMMON-LABEL: d_to_h: -; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: fcvtxn v0.2s, v0.2d -; CHECK-COMMON-NEXT: fcvtxn2 v0.4s, v1.2d -; CHECK-COMMON-NEXT: fcvtn v0.4h, v0.4s -; CHECK-COMMON-NEXT: ret +; CHECK-CVT-SD-LABEL: d_to_h: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtxn v0.2s, v0.2d +; CHECK-CVT-SD-NEXT: fcvtxn2 v0.4s, v1.2d +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: d_to_h: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: fcvtxn v0.2s, v0.2d +; CHECK-FP16-SD-NEXT: fcvtxn2 v0.4s, v1.2d +; CHECK-FP16-SD-NEXT: fcvtn v0.4h, v0.4s +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: d_to_h: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: mov d2, v0.d[1] +; CHECK-CVT-GI-NEXT: fcvt h0, d0 +; CHECK-CVT-GI-NEXT: mov d3, v1.d[1] +; CHECK-CVT-GI-NEXT: fcvt h1, d1 +; CHECK-CVT-GI-NEXT: fcvt h2, d2 +; CHECK-CVT-GI-NEXT: mov v0.h[1], v2.h[0] +; CHECK-CVT-GI-NEXT: fcvt h2, d3 +; CHECK-CVT-GI-NEXT: mov v0.h[2], v1.h[0] +; CHECK-CVT-GI-NEXT: mov v0.h[3], v2.h[0] +; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: d_to_h: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: mov d2, v0.d[1] +; CHECK-FP16-GI-NEXT: fcvt h0, d0 +; CHECK-FP16-GI-NEXT: mov d3, v1.d[1] +; CHECK-FP16-GI-NEXT: fcvt h1, d1 +; CHECK-FP16-GI-NEXT: fcvt h2, d2 +; CHECK-FP16-GI-NEXT: mov v0.h[1], v2.h[0] +; CHECK-FP16-GI-NEXT: fcvt h2, d3 +; CHECK-FP16-GI-NEXT: mov v0.h[2], v1.h[0] +; CHECK-FP16-GI-NEXT: mov v0.h[3], v2.h[0] +; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-FP16-GI-NEXT: ret %1 = fptrunc <4 x double> %a to <4 x half> ret <4 x half> %1 } define <4 x float> @h_to_s(<4 x half> %a) { -; CHECK-COMMON-LABEL: h_to_s: -; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: fcvtl v0.4s, v0.4h -; CHECK-COMMON-NEXT: ret +; CHECK-LABEL: h_to_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: ret %1 = fpext <4 x half> %a to <4 x float> ret <4 x float> %1 } define <4 x double> @h_to_d(<4 x half> %a) { -; CHECK-COMMON-LABEL: h_to_d: -; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: fcvtl v0.4s, v0.4h -; CHECK-COMMON-NEXT: fcvtl2 v1.2d, v0.4s -; CHECK-COMMON-NEXT: fcvtl v0.2d, v0.2s -; CHECK-COMMON-NEXT: ret +; CHECK-CVT-SD-LABEL: h_to_d: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcvtl2 v1.2d, v0.4s +; CHECK-CVT-SD-NEXT: fcvtl v0.2d, v0.2s +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: h_to_d: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-SD-NEXT: fcvtl2 v1.2d, v0.4s +; CHECK-FP16-SD-NEXT: fcvtl v0.2d, v0.2s +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: h_to_d: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-CVT-GI-NEXT: mov h1, v0.h[1] +; CHECK-CVT-GI-NEXT: mov h2, v0.h[2] +; CHECK-CVT-GI-NEXT: mov h3, v0.h[3] +; CHECK-CVT-GI-NEXT: fcvt d0, h0 +; CHECK-CVT-GI-NEXT: fcvt d4, h1 +; CHECK-CVT-GI-NEXT: fcvt d1, h2 +; CHECK-CVT-GI-NEXT: fcvt d2, h3 +; CHECK-CVT-GI-NEXT: mov v0.d[1], v4.d[0] +; CHECK-CVT-GI-NEXT: mov v1.d[1], v2.d[0] +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: h_to_d: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] +; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] +; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] +; CHECK-FP16-GI-NEXT: fcvt d0, h0 +; CHECK-FP16-GI-NEXT: fcvt d4, h1 +; CHECK-FP16-GI-NEXT: fcvt d1, h2 +; CHECK-FP16-GI-NEXT: fcvt d2, h3 +; CHECK-FP16-GI-NEXT: mov v0.d[1], v4.d[0] +; CHECK-FP16-GI-NEXT: mov v1.d[1], v2.d[0] +; CHECK-FP16-GI-NEXT: ret %1 = fpext <4 x half> %a to <4 x double> ret <4 x double> %1 } define <4 x half> @bitcast_i_to_h(float, <4 x i16> %a) { -; CHECK-COMMON-LABEL: bitcast_i_to_h: -; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: fmov d0, d1 -; CHECK-COMMON-NEXT: ret +; CHECK-LABEL: bitcast_i_to_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: ret %2 = bitcast <4 x i16> %a to <4 x half> ret <4 x half> %2 } define <4 x i16> @bitcast_h_to_i(float, <4 x half> %a) { -; CHECK-COMMON-LABEL: bitcast_h_to_i: -; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: fmov d0, d1 -; CHECK-COMMON-NEXT: ret +; CHECK-LABEL: bitcast_h_to_i: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: ret %2 = bitcast <4 x half> %a to <4 x i16> ret <4 x i16> %2 } define <4 x half> @sitofp_i8(<4 x i8> %a) #0 { -; CHECK-CVT-LABEL: sitofp_i8: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-CVT-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-CVT-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-CVT-NEXT: scvtf v0.4s, v0.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: sitofp_i8: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: shl v0.4h, v0.4h, #8 +; CHECK-CVT-SD-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-CVT-SD-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-CVT-SD-NEXT: scvtf v0.4s, v0.4s +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: sitofp_i8: ; CHECK-FP16: // %bb.0: @@ -182,6 +304,15 @@ define <4 x half> @sitofp_i8(<4 x i8> %a) #0 { ; CHECK-FP16-NEXT: sshr v0.4h, v0.4h, #8 ; CHECK-FP16-NEXT: scvtf v0.4h, v0.4h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: sitofp_i8: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-CVT-GI-NEXT: shl v0.4s, v0.4s, #24 +; CHECK-CVT-GI-NEXT: sshr v0.4s, v0.4s, #24 +; CHECK-CVT-GI-NEXT: scvtf v0.4s, v0.4s +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret %1 = sitofp <4 x i8> %a to <4 x half> ret <4 x half> %1 } @@ -204,43 +335,59 @@ define <4 x half> @sitofp_i16(<4 x i16> %a) #0 { define <4 x half> @sitofp_i32(<4 x i32> %a) #0 { -; CHECK-COMMON-LABEL: sitofp_i32: -; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: scvtf v0.4s, v0.4s -; CHECK-COMMON-NEXT: fcvtn v0.4h, v0.4s -; CHECK-COMMON-NEXT: ret +; CHECK-LABEL: sitofp_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf v0.4s, v0.4s +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: ret %1 = sitofp <4 x i32> %a to <4 x half> ret <4 x half> %1 } define <4 x half> @sitofp_i64(<4 x i64> %a) #0 { -; CHECK-COMMON-LABEL: sitofp_i64: -; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: scvtf v0.2d, v0.2d -; CHECK-COMMON-NEXT: scvtf v1.2d, v1.2d -; CHECK-COMMON-NEXT: fcvtn v0.2s, v0.2d -; CHECK-COMMON-NEXT: fcvtn2 v0.4s, v1.2d -; CHECK-COMMON-NEXT: fcvtn v0.4h, v0.4s -; CHECK-COMMON-NEXT: ret +; CHECK-LABEL: sitofp_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf v0.2d, v0.2d +; CHECK-NEXT: scvtf v1.2d, v1.2d +; CHECK-NEXT: fcvtn v0.2s, v0.2d +; CHECK-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: ret %1 = sitofp <4 x i64> %a to <4 x half> ret <4 x half> %1 } define <4 x half> @uitofp_i8(<4 x i8> %a) #0 { -; CHECK-CVT-LABEL: uitofp_i8: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: bic v0.4h, #255, lsl #8 -; CHECK-CVT-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-CVT-NEXT: ucvtf v0.4s, v0.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: uitofp_i8: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: bic v0.4h, #255, lsl #8 +; CHECK-CVT-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-CVT-SD-NEXT: ucvtf v0.4s, v0.4s +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-LABEL: uitofp_i8: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: bic v0.4h, #255, lsl #8 -; CHECK-FP16-NEXT: ucvtf v0.4h, v0.4h -; CHECK-FP16-NEXT: ret +; CHECK-FP16-SD-LABEL: uitofp_i8: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: bic v0.4h, #255, lsl #8 +; CHECK-FP16-SD-NEXT: ucvtf v0.4h, v0.4h +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: uitofp_i8: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: movi v1.2d, #0x0000ff000000ff +; CHECK-CVT-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-CVT-GI-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-CVT-GI-NEXT: ucvtf v0.4s, v0.4s +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: uitofp_i8: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: movi d1, #0xff00ff00ff00ff +; CHECK-FP16-GI-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-FP16-GI-NEXT: ucvtf v0.4h, v0.4h +; CHECK-FP16-GI-NEXT: ret %1 = uitofp <4 x i8> %a to <4 x half> ret <4 x half> %1 } @@ -264,35 +411,35 @@ define <4 x half> @uitofp_i16(<4 x i16> %a) #0 { define <4 x half> @uitofp_i32(<4 x i32> %a) #0 { -; CHECK-COMMON-LABEL: uitofp_i32: -; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: ucvtf v0.4s, v0.4s -; CHECK-COMMON-NEXT: fcvtn v0.4h, v0.4s -; CHECK-COMMON-NEXT: ret +; CHECK-LABEL: uitofp_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf v0.4s, v0.4s +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: ret %1 = uitofp <4 x i32> %a to <4 x half> ret <4 x half> %1 } define <4 x half> @uitofp_i64(<4 x i64> %a) #0 { -; CHECK-COMMON-LABEL: uitofp_i64: -; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: ucvtf v0.2d, v0.2d -; CHECK-COMMON-NEXT: ucvtf v1.2d, v1.2d -; CHECK-COMMON-NEXT: fcvtn v0.2s, v0.2d -; CHECK-COMMON-NEXT: fcvtn2 v0.4s, v1.2d -; CHECK-COMMON-NEXT: fcvtn v0.4h, v0.4s -; CHECK-COMMON-NEXT: ret +; CHECK-LABEL: uitofp_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf v0.2d, v0.2d +; CHECK-NEXT: ucvtf v1.2d, v1.2d +; CHECK-NEXT: fcvtn v0.2s, v0.2d +; CHECK-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: ret %1 = uitofp <4 x i64> %a to <4 x half> ret <4 x half> %1 } define void @test_insert_at_zero(half %a, ptr %b) #0 { -; CHECK-COMMON-LABEL: test_insert_at_zero: -; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: // kill: def $h0 killed $h0 def $d0 -; CHECK-COMMON-NEXT: str d0, [x0] -; CHECK-COMMON-NEXT: ret +; CHECK-LABEL: test_insert_at_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 def $d0 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret %1 = insertelement <4 x half> undef, half %a, i64 0 store <4 x half> %1, ptr %b, align 4 ret void @@ -331,17 +478,29 @@ define <4 x i16> @fptosi_i16(<4 x half> %a) #0 { } define <4 x i8> @fptoui_i8(<4 x half> %a) #0 { -; CHECK-CVT-LABEL: fptoui_i8: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: xtn v0.4h, v0.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: fptoui_i8: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-LABEL: fptoui_i8: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h -; CHECK-FP16-NEXT: ret +; CHECK-FP16-SD-LABEL: fptoui_i8: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: fptoui_i8: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: fptoui_i8: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-GI-NEXT: ret ; NOTE: fcvtzs selected here because the xtn shaves the sign bit %1 = fptoui<4 x half> %a to <4 x i8> ret <4 x i8> %1 @@ -364,36 +523,45 @@ define <4 x i16> @fptoui_i16(<4 x half> %a) #0 { } define <4 x i1> @test_fcmp_une(<4 x half> %a, <4 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_une: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmeq v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: mvn v0.16b, v0.16b -; CHECK-CVT-NEXT: xtn v0.4h, v0.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_une: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmeq v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_une: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcmeq v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: mvn v0.8b, v0.8b ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_une: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcmeq v0.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret %1 = fcmp une <4 x half> %a, %b ret <4 x i1> %1 } define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_ueq: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmgt v2.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s -; CHECK-CVT-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-CVT-NEXT: xtn v0.4h, v0.4s -; CHECK-CVT-NEXT: mvn v0.8b, v0.8b -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_ueq: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-SD-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ueq: ; CHECK-FP16: // %bb.0: @@ -402,102 +570,149 @@ define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-FP16-NEXT: orr v0.8b, v0.8b, v2.8b ; CHECK-FP16-NEXT: mvn v0.8b, v0.8b ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ueq: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-GI-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret %1 = fcmp ueq <4 x half> %a, %b ret <4 x i1> %1 } define <4 x i1> @test_fcmp_ugt(<4 x half> %a, <4 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_ugt: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcmge v0.4s, v1.4s, v0.4s -; CHECK-CVT-NEXT: xtn v0.4h, v0.4s -; CHECK-CVT-NEXT: mvn v0.8b, v0.8b -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_ugt: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcmge v0.4s, v1.4s, v0.4s +; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ugt: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcmge v0.4h, v1.4h, v0.4h ; CHECK-FP16-NEXT: mvn v0.8b, v0.8b ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ugt: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcmge v0.4s, v1.4s, v0.4s +; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret %1 = fcmp ugt <4 x half> %a, %b ret <4 x i1> %1 } define <4 x i1> @test_fcmp_uge(<4 x half> %a, <4 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_uge: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s -; CHECK-CVT-NEXT: xtn v0.4h, v0.4s -; CHECK-CVT-NEXT: mvn v0.8b, v0.8b -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_uge: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_uge: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h ; CHECK-FP16-NEXT: mvn v0.8b, v0.8b ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_uge: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret %1 = fcmp uge <4 x half> %a, %b ret <4 x i1> %1 } define <4 x i1> @test_fcmp_ult(<4 x half> %a, <4 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_ult: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmge v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: xtn v0.4h, v0.4s -; CHECK-CVT-NEXT: mvn v0.8b, v0.8b -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_ult: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmge v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ult: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcmge v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: mvn v0.8b, v0.8b ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ult: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcmge v0.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret %1 = fcmp ult <4 x half> %a, %b ret <4 x i1> %1 } define <4 x i1> @test_fcmp_ule(<4 x half> %a, <4 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_ule: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmgt v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: xtn v0.4h, v0.4s -; CHECK-CVT-NEXT: mvn v0.8b, v0.8b -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_ule: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ule: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcmgt v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: mvn v0.8b, v0.8b ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ule: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret %1 = fcmp ule <4 x half> %a, %b ret <4 x i1> %1 } define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_uno: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmge v2.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s -; CHECK-CVT-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-CVT-NEXT: xtn v0.4h, v0.4s -; CHECK-CVT-NEXT: mvn v0.8b, v0.8b -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_uno: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmge v2.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-SD-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_uno: ; CHECK-FP16: // %bb.0: @@ -506,21 +721,32 @@ define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-FP16-NEXT: orr v0.8b, v0.8b, v2.8b ; CHECK-FP16-NEXT: mvn v0.8b, v0.8b ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_uno: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcmge v2.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-GI-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret %1 = fcmp uno <4 x half> %a, %b ret <4 x i1> %1 } define <4 x i1> @test_fcmp_one(<4 x half> %a, <4 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_one: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmgt v2.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s -; CHECK-CVT-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-CVT-NEXT: xtn v0.4h, v0.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_one: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-SD-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_one: ; CHECK-FP16: // %bb.0: @@ -528,60 +754,94 @@ define <4 x i1> @test_fcmp_one(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h ; CHECK-FP16-NEXT: orr v0.8b, v0.8b, v2.8b ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_one: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-GI-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret %1 = fcmp one <4 x half> %a, %b ret <4 x i1> %1 } define <4 x i1> @test_fcmp_oeq(<4 x half> %a, <4 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_oeq: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmeq v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: xtn v0.4h, v0.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_oeq: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmeq v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_oeq: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcmeq v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_oeq: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcmeq v0.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret %1 = fcmp oeq <4 x half> %a, %b ret <4 x i1> %1 } define <4 x i1> @test_fcmp_ogt(<4 x half> %a, <4 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_ogt: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmgt v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: xtn v0.4h, v0.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_ogt: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ogt: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcmgt v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ogt: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret %1 = fcmp ogt <4 x half> %a, %b ret <4 x i1> %1 } define <4 x i1> @test_fcmp_oge(<4 x half> %a, <4 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_oge: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmge v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: xtn v0.4h, v0.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_oge: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmge v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_oge: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcmge v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_oge: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcmge v0.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret %1 = fcmp oge <4 x half> %a, %b ret <4 x i1> %1 @@ -624,15 +884,15 @@ define <4 x i1> @test_fcmp_ole(<4 x half> %a, <4 x half> %b) #0 { } define <4 x i1> @test_fcmp_ord(<4 x half> %a, <4 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_ord: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmge v2.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s -; CHECK-CVT-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-CVT-NEXT: xtn v0.4h, v0.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_ord: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmge v2.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-SD-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ord: ; CHECK-FP16: // %bb.0: @@ -640,6 +900,16 @@ define <4 x i1> @test_fcmp_ord(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h ; CHECK-FP16-NEXT: orr v0.8b, v0.8b, v2.8b ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ord: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcmge v2.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-GI-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret %1 = fcmp ord <4 x half> %a, %b ret <4 x i1> %1 diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll index fcb42a74ce69..86763eb5f9e3 100644 --- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -1,24 +1,38 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64 -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT -; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 +; RUN: llc < %s -mtriple=aarch64 -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT,CHECK-CVT-SD +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-FP16-SD +; RUN: llc < %s -mtriple=aarch64 -mattr=-fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-CVT,CHECK-CVT-GI +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-FP16-GI define <8 x half> @add_h(<8 x half> %a, <8 x half> %b) { -; CHECK-CVT-LABEL: add_h: -; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: fcvtl v2.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v3.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl2 v1.4s, v1.8h -; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-CVT-NEXT: fadd v2.4s, v3.4s, v2.4s -; CHECK-CVT-NEXT: fadd v1.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v2.4s -; CHECK-CVT-NEXT: fcvtn2 v0.8h, v1.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: add_h: +; CHECK-CVT-SD: // %bb.0: // %entry +; CHECK-CVT-SD-NEXT: fcvtl v2.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v3.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-SD-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-SD-NEXT: fadd v2.4s, v3.4s, v2.4s +; CHECK-CVT-SD-NEXT: fadd v1.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v2.4s +; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v1.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: add_h: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fadd v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: add_h: +; CHECK-CVT-GI: // %bb.0: // %entry +; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-GI-NEXT: fadd v2.4s, v2.4s, v3.4s +; CHECK-CVT-GI-NEXT: fadd v1.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v2.4s +; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v1.4s +; CHECK-CVT-GI-NEXT: ret entry: %0 = fadd <8 x half> %a, %b ret <8 x half> %0 @@ -26,22 +40,34 @@ entry: define <8 x half> @sub_h(<8 x half> %a, <8 x half> %b) { -; CHECK-CVT-LABEL: sub_h: -; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: fcvtl v2.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v3.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl2 v1.4s, v1.8h -; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-CVT-NEXT: fsub v2.4s, v3.4s, v2.4s -; CHECK-CVT-NEXT: fsub v1.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v2.4s -; CHECK-CVT-NEXT: fcvtn2 v0.8h, v1.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: sub_h: +; CHECK-CVT-SD: // %bb.0: // %entry +; CHECK-CVT-SD-NEXT: fcvtl v2.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v3.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-SD-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-SD-NEXT: fsub v2.4s, v3.4s, v2.4s +; CHECK-CVT-SD-NEXT: fsub v1.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v2.4s +; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v1.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: sub_h: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fsub v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: sub_h: +; CHECK-CVT-GI: // %bb.0: // %entry +; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-GI-NEXT: fsub v2.4s, v2.4s, v3.4s +; CHECK-CVT-GI-NEXT: fsub v1.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v2.4s +; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v1.4s +; CHECK-CVT-GI-NEXT: ret entry: %0 = fsub <8 x half> %a, %b ret <8 x half> %0 @@ -49,22 +75,34 @@ entry: define <8 x half> @mul_h(<8 x half> %a, <8 x half> %b) { -; CHECK-CVT-LABEL: mul_h: -; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: fcvtl v2.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v3.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl2 v1.4s, v1.8h -; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-CVT-NEXT: fmul v2.4s, v3.4s, v2.4s -; CHECK-CVT-NEXT: fmul v1.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v2.4s -; CHECK-CVT-NEXT: fcvtn2 v0.8h, v1.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: mul_h: +; CHECK-CVT-SD: // %bb.0: // %entry +; CHECK-CVT-SD-NEXT: fcvtl v2.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v3.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-SD-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-SD-NEXT: fmul v2.4s, v3.4s, v2.4s +; CHECK-CVT-SD-NEXT: fmul v1.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v2.4s +; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v1.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: mul_h: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fmul v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: mul_h: +; CHECK-CVT-GI: // %bb.0: // %entry +; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-GI-NEXT: fmul v2.4s, v2.4s, v3.4s +; CHECK-CVT-GI-NEXT: fmul v1.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v2.4s +; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v1.4s +; CHECK-CVT-GI-NEXT: ret entry: %0 = fmul <8 x half> %a, %b ret <8 x half> %0 @@ -72,22 +110,34 @@ entry: define <8 x half> @div_h(<8 x half> %a, <8 x half> %b) { -; CHECK-CVT-LABEL: div_h: -; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: fcvtl v2.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v3.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl2 v1.4s, v1.8h -; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-CVT-NEXT: fdiv v2.4s, v3.4s, v2.4s -; CHECK-CVT-NEXT: fdiv v1.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v2.4s -; CHECK-CVT-NEXT: fcvtn2 v0.8h, v1.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: div_h: +; CHECK-CVT-SD: // %bb.0: // %entry +; CHECK-CVT-SD-NEXT: fcvtl v2.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v3.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-SD-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-SD-NEXT: fdiv v2.4s, v3.4s, v2.4s +; CHECK-CVT-SD-NEXT: fdiv v1.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v2.4s +; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v1.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: div_h: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fdiv v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: div_h: +; CHECK-CVT-GI: // %bb.0: // %entry +; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-GI-NEXT: fdiv v2.4s, v2.4s, v3.4s +; CHECK-CVT-GI-NEXT: fdiv v1.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v2.4s +; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v1.4s +; CHECK-CVT-GI-NEXT: ret entry: %0 = fdiv <8 x half> %a, %b ret <8 x half> %0 @@ -126,39 +176,171 @@ define <8 x half> @s_to_h(<8 x float> %a) { } define <8 x half> @d_to_h(<8 x double> %a) { -; CHECK-LABEL: d_to_h: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtxn v0.2s, v0.2d -; CHECK-NEXT: fcvtxn v2.2s, v2.2d -; CHECK-NEXT: fcvtxn2 v0.4s, v1.2d -; CHECK-NEXT: fcvtxn2 v2.4s, v3.2d -; CHECK-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NEXT: fcvtn2 v0.8h, v2.4s -; CHECK-NEXT: ret +; CHECK-CVT-SD-LABEL: d_to_h: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtxn v0.2s, v0.2d +; CHECK-CVT-SD-NEXT: fcvtxn v2.2s, v2.2d +; CHECK-CVT-SD-NEXT: fcvtxn2 v0.4s, v1.2d +; CHECK-CVT-SD-NEXT: fcvtxn2 v2.4s, v3.2d +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: d_to_h: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: fcvtxn v0.2s, v0.2d +; CHECK-FP16-SD-NEXT: fcvtxn v2.2s, v2.2d +; CHECK-FP16-SD-NEXT: fcvtxn2 v0.4s, v1.2d +; CHECK-FP16-SD-NEXT: fcvtxn2 v2.4s, v3.2d +; CHECK-FP16-SD-NEXT: fcvtn v0.4h, v0.4s +; CHECK-FP16-SD-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: d_to_h: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: mov d4, v0.d[1] +; CHECK-CVT-GI-NEXT: fcvt h0, d0 +; CHECK-CVT-GI-NEXT: mov d5, v1.d[1] +; CHECK-CVT-GI-NEXT: fcvt h1, d1 +; CHECK-CVT-GI-NEXT: fcvt h4, d4 +; CHECK-CVT-GI-NEXT: mov v0.h[1], v4.h[0] +; CHECK-CVT-GI-NEXT: fcvt h4, d5 +; CHECK-CVT-GI-NEXT: mov v0.h[2], v1.h[0] +; CHECK-CVT-GI-NEXT: mov d1, v2.d[1] +; CHECK-CVT-GI-NEXT: fcvt h2, d2 +; CHECK-CVT-GI-NEXT: mov v0.h[3], v4.h[0] +; CHECK-CVT-GI-NEXT: fcvt h1, d1 +; CHECK-CVT-GI-NEXT: mov v0.h[4], v2.h[0] +; CHECK-CVT-GI-NEXT: mov d2, v3.d[1] +; CHECK-CVT-GI-NEXT: fcvt h3, d3 +; CHECK-CVT-GI-NEXT: mov v0.h[5], v1.h[0] +; CHECK-CVT-GI-NEXT: fcvt h1, d2 +; CHECK-CVT-GI-NEXT: mov v0.h[6], v3.h[0] +; CHECK-CVT-GI-NEXT: mov v0.h[7], v1.h[0] +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: d_to_h: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: mov d4, v0.d[1] +; CHECK-FP16-GI-NEXT: fcvt h0, d0 +; CHECK-FP16-GI-NEXT: mov d5, v1.d[1] +; CHECK-FP16-GI-NEXT: fcvt h1, d1 +; CHECK-FP16-GI-NEXT: fcvt h4, d4 +; CHECK-FP16-GI-NEXT: mov v0.h[1], v4.h[0] +; CHECK-FP16-GI-NEXT: fcvt h4, d5 +; CHECK-FP16-GI-NEXT: mov v0.h[2], v1.h[0] +; CHECK-FP16-GI-NEXT: mov d1, v2.d[1] +; CHECK-FP16-GI-NEXT: fcvt h2, d2 +; CHECK-FP16-GI-NEXT: mov v0.h[3], v4.h[0] +; CHECK-FP16-GI-NEXT: fcvt h1, d1 +; CHECK-FP16-GI-NEXT: mov v0.h[4], v2.h[0] +; CHECK-FP16-GI-NEXT: mov d2, v3.d[1] +; CHECK-FP16-GI-NEXT: fcvt h3, d3 +; CHECK-FP16-GI-NEXT: mov v0.h[5], v1.h[0] +; CHECK-FP16-GI-NEXT: fcvt h1, d2 +; CHECK-FP16-GI-NEXT: mov v0.h[6], v3.h[0] +; CHECK-FP16-GI-NEXT: mov v0.h[7], v1.h[0] +; CHECK-FP16-GI-NEXT: ret %1 = fptrunc <8 x double> %a to <8 x half> ret <8 x half> %1 } define <8 x float> @h_to_s(<8 x half> %a) { -; CHECK-LABEL: h_to_s: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl2 v1.4s, v0.8h -; CHECK-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NEXT: ret +; CHECK-CVT-SD-LABEL: h_to_s: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: h_to_s: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: h_to_s: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-GI-NEXT: mov v0.16b, v2.16b +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: h_to_s: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-FP16-GI-NEXT: mov v0.16b, v2.16b +; CHECK-FP16-GI-NEXT: ret %1 = fpext <8 x half> %a to <8 x float> ret <8 x float> %1 } define <8 x double> @h_to_d(<8 x half> %a) { -; CHECK-LABEL: h_to_d: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl v1.4s, v0.4h -; CHECK-NEXT: fcvtl2 v2.4s, v0.8h -; CHECK-NEXT: fcvtl v0.2d, v1.2s -; CHECK-NEXT: fcvtl2 v3.2d, v2.4s -; CHECK-NEXT: fcvtl2 v1.2d, v1.4s -; CHECK-NEXT: fcvtl v2.2d, v2.2s -; CHECK-NEXT: ret +; CHECK-CVT-SD-LABEL: h_to_d: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl v0.2d, v1.2s +; CHECK-CVT-SD-NEXT: fcvtl2 v3.2d, v2.4s +; CHECK-CVT-SD-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-CVT-SD-NEXT: fcvtl v2.2d, v2.2s +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: h_to_d: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: fcvtl v1.4s, v0.4h +; CHECK-FP16-SD-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-FP16-SD-NEXT: fcvtl v0.2d, v1.2s +; CHECK-FP16-SD-NEXT: fcvtl2 v3.2d, v2.4s +; CHECK-FP16-SD-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-FP16-SD-NEXT: fcvtl v2.2d, v2.2s +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: h_to_d: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: mov h1, v0.h[1] +; CHECK-CVT-GI-NEXT: mov h2, v0.h[2] +; CHECK-CVT-GI-NEXT: mov h3, v0.h[3] +; CHECK-CVT-GI-NEXT: mov h4, v0.h[4] +; CHECK-CVT-GI-NEXT: mov h5, v0.h[5] +; CHECK-CVT-GI-NEXT: mov h6, v0.h[6] +; CHECK-CVT-GI-NEXT: mov h7, v0.h[7] +; CHECK-CVT-GI-NEXT: fcvt d0, h0 +; CHECK-CVT-GI-NEXT: fcvt d16, h1 +; CHECK-CVT-GI-NEXT: fcvt d1, h2 +; CHECK-CVT-GI-NEXT: fcvt d17, h3 +; CHECK-CVT-GI-NEXT: fcvt d2, h4 +; CHECK-CVT-GI-NEXT: fcvt d4, h5 +; CHECK-CVT-GI-NEXT: fcvt d3, h6 +; CHECK-CVT-GI-NEXT: fcvt d5, h7 +; CHECK-CVT-GI-NEXT: mov v0.d[1], v16.d[0] +; CHECK-CVT-GI-NEXT: mov v1.d[1], v17.d[0] +; CHECK-CVT-GI-NEXT: mov v2.d[1], v4.d[0] +; CHECK-CVT-GI-NEXT: mov v3.d[1], v5.d[0] +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: h_to_d: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] +; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] +; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] +; CHECK-FP16-GI-NEXT: mov h4, v0.h[4] +; CHECK-FP16-GI-NEXT: mov h5, v0.h[5] +; CHECK-FP16-GI-NEXT: mov h6, v0.h[6] +; CHECK-FP16-GI-NEXT: mov h7, v0.h[7] +; CHECK-FP16-GI-NEXT: fcvt d0, h0 +; CHECK-FP16-GI-NEXT: fcvt d16, h1 +; CHECK-FP16-GI-NEXT: fcvt d1, h2 +; CHECK-FP16-GI-NEXT: fcvt d17, h3 +; CHECK-FP16-GI-NEXT: fcvt d2, h4 +; CHECK-FP16-GI-NEXT: fcvt d4, h5 +; CHECK-FP16-GI-NEXT: fcvt d3, h6 +; CHECK-FP16-GI-NEXT: fcvt d5, h7 +; CHECK-FP16-GI-NEXT: mov v0.d[1], v16.d[0] +; CHECK-FP16-GI-NEXT: mov v1.d[1], v17.d[0] +; CHECK-FP16-GI-NEXT: mov v2.d[1], v4.d[0] +; CHECK-FP16-GI-NEXT: mov v3.d[1], v5.d[0] +; CHECK-FP16-GI-NEXT: ret %1 = fpext <8 x half> %a to <8 x double> ret <8 x double> %1 } @@ -183,14 +365,14 @@ define <8 x i16> @bitcast_h_to_i(float, <8 x half> %a) { } define <4 x half> @sitofp_v4i8(<4 x i8> %a) #0 { -; CHECK-CVT-LABEL: sitofp_v4i8: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-CVT-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-CVT-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-CVT-NEXT: scvtf v0.4s, v0.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: sitofp_v4i8: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: shl v0.4h, v0.4h, #8 +; CHECK-CVT-SD-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-CVT-SD-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-CVT-SD-NEXT: scvtf v0.4s, v0.4s +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: sitofp_v4i8: ; CHECK-FP16: // %bb.0: @@ -198,76 +380,132 @@ define <4 x half> @sitofp_v4i8(<4 x i8> %a) #0 { ; CHECK-FP16-NEXT: sshr v0.4h, v0.4h, #8 ; CHECK-FP16-NEXT: scvtf v0.4h, v0.4h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: sitofp_v4i8: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-CVT-GI-NEXT: shl v0.4s, v0.4s, #24 +; CHECK-CVT-GI-NEXT: sshr v0.4s, v0.4s, #24 +; CHECK-CVT-GI-NEXT: scvtf v0.4s, v0.4s +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret %1 = sitofp <4 x i8> %a to <4 x half> ret <4 x half> %1 } define <8 x half> @sitofp_v8i8(<8 x i8> %a) #0 { -; CHECK-CVT-LABEL: sitofp_v8i8: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-CVT-NEXT: sshll v1.4s, v0.4h, #0 -; CHECK-CVT-NEXT: sshll2 v2.4s, v0.8h, #0 -; CHECK-CVT-NEXT: scvtf v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v1.4s -; CHECK-CVT-NEXT: scvtf v1.4s, v2.4s -; CHECK-CVT-NEXT: fcvtn2 v0.8h, v1.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: sitofp_v8i8: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-CVT-SD-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-CVT-SD-NEXT: sshll2 v2.4s, v0.8h, #0 +; CHECK-CVT-SD-NEXT: scvtf v1.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v1.4s +; CHECK-CVT-SD-NEXT: scvtf v1.4s, v2.4s +; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v1.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: sitofp_v8i8: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: sshll v0.8h, v0.8b, #0 ; CHECK-FP16-NEXT: scvtf v0.8h, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: sitofp_v8i8: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-CVT-GI-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-CVT-GI-NEXT: sshll2 v0.4s, v0.8h, #0 +; CHECK-CVT-GI-NEXT: scvtf v1.4s, v1.4s +; CHECK-CVT-GI-NEXT: scvtf v2.4s, v0.4s +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-CVT-GI-NEXT: ret %1 = sitofp <8 x i8> %a to <8 x half> ret <8 x half> %1 } define <16 x half> @sitofp_v16i8(<16 x i8> %a) #0 { -; CHECK-CVT-LABEL: sitofp_v16i8: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: sshll2 v1.8h, v0.16b, #0 -; CHECK-CVT-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-CVT-NEXT: sshll v2.4s, v1.4h, #0 -; CHECK-CVT-NEXT: sshll v3.4s, v0.4h, #0 -; CHECK-CVT-NEXT: sshll2 v4.4s, v1.8h, #0 -; CHECK-CVT-NEXT: sshll2 v5.4s, v0.8h, #0 -; CHECK-CVT-NEXT: scvtf v2.4s, v2.4s -; CHECK-CVT-NEXT: scvtf v3.4s, v3.4s -; CHECK-CVT-NEXT: fcvtn v1.4h, v2.4s -; CHECK-CVT-NEXT: scvtf v2.4s, v4.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v3.4s -; CHECK-CVT-NEXT: scvtf v3.4s, v5.4s -; CHECK-CVT-NEXT: fcvtn2 v1.8h, v2.4s -; CHECK-CVT-NEXT: fcvtn2 v0.8h, v3.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: sitofp_v16i8: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-CVT-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-CVT-SD-NEXT: sshll v2.4s, v1.4h, #0 +; CHECK-CVT-SD-NEXT: sshll v3.4s, v0.4h, #0 +; CHECK-CVT-SD-NEXT: sshll2 v4.4s, v1.8h, #0 +; CHECK-CVT-SD-NEXT: sshll2 v5.4s, v0.8h, #0 +; CHECK-CVT-SD-NEXT: scvtf v2.4s, v2.4s +; CHECK-CVT-SD-NEXT: scvtf v3.4s, v3.4s +; CHECK-CVT-SD-NEXT: fcvtn v1.4h, v2.4s +; CHECK-CVT-SD-NEXT: scvtf v2.4s, v4.4s +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v3.4s +; CHECK-CVT-SD-NEXT: scvtf v3.4s, v5.4s +; CHECK-CVT-SD-NEXT: fcvtn2 v1.8h, v2.4s +; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v3.4s +; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-LABEL: sitofp_v16i8: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: sshll2 v1.8h, v0.16b, #0 -; CHECK-FP16-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-FP16-NEXT: scvtf v1.8h, v1.8h -; CHECK-FP16-NEXT: scvtf v0.8h, v0.8h -; CHECK-FP16-NEXT: ret +; CHECK-FP16-SD-LABEL: sitofp_v16i8: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-FP16-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-FP16-SD-NEXT: scvtf v1.8h, v1.8h +; CHECK-FP16-SD-NEXT: scvtf v0.8h, v0.8h +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: sitofp_v16i8: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: sshll v1.8h, v0.8b, #0 +; CHECK-CVT-GI-NEXT: sshll2 v0.8h, v0.16b, #0 +; CHECK-CVT-GI-NEXT: sshll v2.4s, v1.4h, #0 +; CHECK-CVT-GI-NEXT: sshll v3.4s, v0.4h, #0 +; CHECK-CVT-GI-NEXT: sshll2 v1.4s, v1.8h, #0 +; CHECK-CVT-GI-NEXT: sshll2 v0.4s, v0.8h, #0 +; CHECK-CVT-GI-NEXT: scvtf v2.4s, v2.4s +; CHECK-CVT-GI-NEXT: scvtf v3.4s, v3.4s +; CHECK-CVT-GI-NEXT: scvtf v4.4s, v1.4s +; CHECK-CVT-GI-NEXT: scvtf v5.4s, v0.4s +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v2.4s +; CHECK-CVT-GI-NEXT: fcvtn v1.4h, v3.4s +; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v4.4s +; CHECK-CVT-GI-NEXT: fcvtn2 v1.8h, v5.4s +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: sitofp_v16i8: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: sshll v1.8h, v0.8b, #0 +; CHECK-FP16-GI-NEXT: sshll2 v2.8h, v0.16b, #0 +; CHECK-FP16-GI-NEXT: scvtf v0.8h, v1.8h +; CHECK-FP16-GI-NEXT: scvtf v1.8h, v2.8h +; CHECK-FP16-GI-NEXT: ret %1 = sitofp <16 x i8> %a to <16 x half> ret <16 x half> %1 } define <8 x half> @sitofp_i16(<8 x i16> %a) #0 { -; CHECK-CVT-LABEL: sitofp_i16: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: sshll v1.4s, v0.4h, #0 -; CHECK-CVT-NEXT: sshll2 v2.4s, v0.8h, #0 -; CHECK-CVT-NEXT: scvtf v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v1.4s -; CHECK-CVT-NEXT: scvtf v1.4s, v2.4s -; CHECK-CVT-NEXT: fcvtn2 v0.8h, v1.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: sitofp_i16: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-CVT-SD-NEXT: sshll2 v2.4s, v0.8h, #0 +; CHECK-CVT-SD-NEXT: scvtf v1.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v1.4s +; CHECK-CVT-SD-NEXT: scvtf v1.4s, v2.4s +; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v1.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: sitofp_i16: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: scvtf v0.8h, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: sitofp_i16: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-CVT-GI-NEXT: sshll2 v0.4s, v0.8h, #0 +; CHECK-CVT-GI-NEXT: scvtf v1.4s, v1.4s +; CHECK-CVT-GI-NEXT: scvtf v2.4s, v0.4s +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-CVT-GI-NEXT: ret %1 = sitofp <8 x i16> %a to <8 x half> ret <8 x half> %1 } @@ -286,108 +524,213 @@ define <8 x half> @sitofp_i32(<8 x i32> %a) #0 { define <8 x half> @sitofp_i64(<8 x i64> %a) #0 { -; CHECK-LABEL: sitofp_i64: -; CHECK: // %bb.0: -; CHECK-NEXT: scvtf v0.2d, v0.2d -; CHECK-NEXT: scvtf v2.2d, v2.2d -; CHECK-NEXT: scvtf v1.2d, v1.2d -; CHECK-NEXT: scvtf v3.2d, v3.2d -; CHECK-NEXT: fcvtn v0.2s, v0.2d -; CHECK-NEXT: fcvtn v2.2s, v2.2d -; CHECK-NEXT: fcvtn2 v0.4s, v1.2d -; CHECK-NEXT: fcvtn2 v2.4s, v3.2d -; CHECK-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NEXT: fcvtn2 v0.8h, v2.4s -; CHECK-NEXT: ret +; CHECK-CVT-SD-LABEL: sitofp_i64: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: scvtf v0.2d, v0.2d +; CHECK-CVT-SD-NEXT: scvtf v2.2d, v2.2d +; CHECK-CVT-SD-NEXT: scvtf v1.2d, v1.2d +; CHECK-CVT-SD-NEXT: scvtf v3.2d, v3.2d +; CHECK-CVT-SD-NEXT: fcvtn v0.2s, v0.2d +; CHECK-CVT-SD-NEXT: fcvtn v2.2s, v2.2d +; CHECK-CVT-SD-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-CVT-SD-NEXT: fcvtn2 v2.4s, v3.2d +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: sitofp_i64: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: scvtf v0.2d, v0.2d +; CHECK-FP16-SD-NEXT: scvtf v2.2d, v2.2d +; CHECK-FP16-SD-NEXT: scvtf v1.2d, v1.2d +; CHECK-FP16-SD-NEXT: scvtf v3.2d, v3.2d +; CHECK-FP16-SD-NEXT: fcvtn v0.2s, v0.2d +; CHECK-FP16-SD-NEXT: fcvtn v2.2s, v2.2d +; CHECK-FP16-SD-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-FP16-SD-NEXT: fcvtn2 v2.4s, v3.2d +; CHECK-FP16-SD-NEXT: fcvtn v0.4h, v0.4s +; CHECK-FP16-SD-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: sitofp_i64: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: scvtf v0.2d, v0.2d +; CHECK-CVT-GI-NEXT: scvtf v1.2d, v1.2d +; CHECK-CVT-GI-NEXT: scvtf v2.2d, v2.2d +; CHECK-CVT-GI-NEXT: scvtf v3.2d, v3.2d +; CHECK-CVT-GI-NEXT: fcvtn v0.2s, v0.2d +; CHECK-CVT-GI-NEXT: fcvtn v2.2s, v2.2d +; CHECK-CVT-GI-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-CVT-GI-NEXT: fcvtn2 v2.4s, v3.2d +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: sitofp_i64: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: scvtf v0.2d, v0.2d +; CHECK-FP16-GI-NEXT: scvtf v1.2d, v1.2d +; CHECK-FP16-GI-NEXT: scvtf v2.2d, v2.2d +; CHECK-FP16-GI-NEXT: scvtf v3.2d, v3.2d +; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d +; CHECK-FP16-GI-NEXT: fcvtn v2.2s, v2.2d +; CHECK-FP16-GI-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-FP16-GI-NEXT: fcvtn2 v2.4s, v3.2d +; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s +; CHECK-FP16-GI-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-FP16-GI-NEXT: ret %1 = sitofp <8 x i64> %a to <8 x half> ret <8 x half> %1 } define <4 x half> @uitofp_v4i8(<4 x i8> %a) #0 { -; CHECK-CVT-LABEL: uitofp_v4i8: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: bic v0.4h, #255, lsl #8 -; CHECK-CVT-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-CVT-NEXT: ucvtf v0.4s, v0.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: uitofp_v4i8: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: bic v0.4h, #255, lsl #8 +; CHECK-CVT-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-CVT-SD-NEXT: ucvtf v0.4s, v0.4s +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-LABEL: uitofp_v4i8: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: bic v0.4h, #255, lsl #8 -; CHECK-FP16-NEXT: ucvtf v0.4h, v0.4h -; CHECK-FP16-NEXT: ret +; CHECK-FP16-SD-LABEL: uitofp_v4i8: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: bic v0.4h, #255, lsl #8 +; CHECK-FP16-SD-NEXT: ucvtf v0.4h, v0.4h +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: uitofp_v4i8: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: movi v1.2d, #0x0000ff000000ff +; CHECK-CVT-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-CVT-GI-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-CVT-GI-NEXT: ucvtf v0.4s, v0.4s +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: uitofp_v4i8: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: movi d1, #0xff00ff00ff00ff +; CHECK-FP16-GI-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-FP16-GI-NEXT: ucvtf v0.4h, v0.4h +; CHECK-FP16-GI-NEXT: ret %1 = uitofp <4 x i8> %a to <4 x half> ret <4 x half> %1 } define <8 x half> @uitofp_v8i8(<8 x i8> %a) #0 { -; CHECK-CVT-LABEL: uitofp_v8i8: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-CVT-NEXT: ushll v1.4s, v0.4h, #0 -; CHECK-CVT-NEXT: ushll2 v2.4s, v0.8h, #0 -; CHECK-CVT-NEXT: ucvtf v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v1.4s -; CHECK-CVT-NEXT: ucvtf v1.4s, v2.4s -; CHECK-CVT-NEXT: fcvtn2 v0.8h, v1.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: uitofp_v8i8: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-CVT-SD-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-CVT-SD-NEXT: ushll2 v2.4s, v0.8h, #0 +; CHECK-CVT-SD-NEXT: ucvtf v1.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v1.4s +; CHECK-CVT-SD-NEXT: ucvtf v1.4s, v2.4s +; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v1.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: uitofp_v8i8: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-FP16-NEXT: ucvtf v0.8h, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: uitofp_v8i8: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-CVT-GI-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-CVT-GI-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-CVT-GI-NEXT: ucvtf v1.4s, v1.4s +; CHECK-CVT-GI-NEXT: ucvtf v2.4s, v0.4s +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-CVT-GI-NEXT: ret %1 = uitofp <8 x i8> %a to <8 x half> ret <8 x half> %1 } define <16 x half> @uitofp_v16i8(<16 x i8> %a) #0 { -; CHECK-CVT-LABEL: uitofp_v16i8: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: ushll2 v1.8h, v0.16b, #0 -; CHECK-CVT-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-CVT-NEXT: ushll v2.4s, v1.4h, #0 -; CHECK-CVT-NEXT: ushll v3.4s, v0.4h, #0 -; CHECK-CVT-NEXT: ushll2 v4.4s, v1.8h, #0 -; CHECK-CVT-NEXT: ushll2 v5.4s, v0.8h, #0 -; CHECK-CVT-NEXT: ucvtf v2.4s, v2.4s -; CHECK-CVT-NEXT: ucvtf v3.4s, v3.4s -; CHECK-CVT-NEXT: fcvtn v1.4h, v2.4s -; CHECK-CVT-NEXT: ucvtf v2.4s, v4.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v3.4s -; CHECK-CVT-NEXT: ucvtf v3.4s, v5.4s -; CHECK-CVT-NEXT: fcvtn2 v1.8h, v2.4s -; CHECK-CVT-NEXT: fcvtn2 v0.8h, v3.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: uitofp_v16i8: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-CVT-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-CVT-SD-NEXT: ushll v2.4s, v1.4h, #0 +; CHECK-CVT-SD-NEXT: ushll v3.4s, v0.4h, #0 +; CHECK-CVT-SD-NEXT: ushll2 v4.4s, v1.8h, #0 +; CHECK-CVT-SD-NEXT: ushll2 v5.4s, v0.8h, #0 +; CHECK-CVT-SD-NEXT: ucvtf v2.4s, v2.4s +; CHECK-CVT-SD-NEXT: ucvtf v3.4s, v3.4s +; CHECK-CVT-SD-NEXT: fcvtn v1.4h, v2.4s +; CHECK-CVT-SD-NEXT: ucvtf v2.4s, v4.4s +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v3.4s +; CHECK-CVT-SD-NEXT: ucvtf v3.4s, v5.4s +; CHECK-CVT-SD-NEXT: fcvtn2 v1.8h, v2.4s +; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v3.4s +; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-LABEL: uitofp_v16i8: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ushll2 v1.8h, v0.16b, #0 -; CHECK-FP16-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-FP16-NEXT: ucvtf v1.8h, v1.8h -; CHECK-FP16-NEXT: ucvtf v0.8h, v0.8h -; CHECK-FP16-NEXT: ret +; CHECK-FP16-SD-LABEL: uitofp_v16i8: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-FP16-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-FP16-SD-NEXT: ucvtf v1.8h, v1.8h +; CHECK-FP16-SD-NEXT: ucvtf v0.8h, v0.8h +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: uitofp_v16i8: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: ushll v1.8h, v0.8b, #0 +; CHECK-CVT-GI-NEXT: ushll2 v0.8h, v0.16b, #0 +; CHECK-CVT-GI-NEXT: ushll v2.4s, v1.4h, #0 +; CHECK-CVT-GI-NEXT: ushll v3.4s, v0.4h, #0 +; CHECK-CVT-GI-NEXT: ushll2 v1.4s, v1.8h, #0 +; CHECK-CVT-GI-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-CVT-GI-NEXT: ucvtf v2.4s, v2.4s +; CHECK-CVT-GI-NEXT: ucvtf v3.4s, v3.4s +; CHECK-CVT-GI-NEXT: ucvtf v4.4s, v1.4s +; CHECK-CVT-GI-NEXT: ucvtf v5.4s, v0.4s +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v2.4s +; CHECK-CVT-GI-NEXT: fcvtn v1.4h, v3.4s +; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v4.4s +; CHECK-CVT-GI-NEXT: fcvtn2 v1.8h, v5.4s +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: uitofp_v16i8: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: ushll v1.8h, v0.8b, #0 +; CHECK-FP16-GI-NEXT: ushll2 v2.8h, v0.16b, #0 +; CHECK-FP16-GI-NEXT: ucvtf v0.8h, v1.8h +; CHECK-FP16-GI-NEXT: ucvtf v1.8h, v2.8h +; CHECK-FP16-GI-NEXT: ret %1 = uitofp <16 x i8> %a to <16 x half> ret <16 x half> %1 } define <8 x half> @uitofp_i16(<8 x i16> %a) #0 { -; CHECK-CVT-LABEL: uitofp_i16: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: ushll v1.4s, v0.4h, #0 -; CHECK-CVT-NEXT: ushll2 v2.4s, v0.8h, #0 -; CHECK-CVT-NEXT: ucvtf v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v1.4s -; CHECK-CVT-NEXT: ucvtf v1.4s, v2.4s -; CHECK-CVT-NEXT: fcvtn2 v0.8h, v1.4s -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: uitofp_i16: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-CVT-SD-NEXT: ushll2 v2.4s, v0.8h, #0 +; CHECK-CVT-SD-NEXT: ucvtf v1.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v1.4s +; CHECK-CVT-SD-NEXT: ucvtf v1.4s, v2.4s +; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v1.4s +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: uitofp_i16: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: ucvtf v0.8h, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: uitofp_i16: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-CVT-GI-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-CVT-GI-NEXT: ucvtf v1.4s, v1.4s +; CHECK-CVT-GI-NEXT: ucvtf v2.4s, v0.4s +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-CVT-GI-NEXT: ret %1 = uitofp <8 x i16> %a to <8 x half> ret <8 x half> %1 } @@ -407,19 +750,61 @@ define <8 x half> @uitofp_i32(<8 x i32> %a) #0 { define <8 x half> @uitofp_i64(<8 x i64> %a) #0 { -; CHECK-LABEL: uitofp_i64: -; CHECK: // %bb.0: -; CHECK-NEXT: ucvtf v0.2d, v0.2d -; CHECK-NEXT: ucvtf v2.2d, v2.2d -; CHECK-NEXT: ucvtf v1.2d, v1.2d -; CHECK-NEXT: ucvtf v3.2d, v3.2d -; CHECK-NEXT: fcvtn v0.2s, v0.2d -; CHECK-NEXT: fcvtn v2.2s, v2.2d -; CHECK-NEXT: fcvtn2 v0.4s, v1.2d -; CHECK-NEXT: fcvtn2 v2.4s, v3.2d -; CHECK-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NEXT: fcvtn2 v0.8h, v2.4s -; CHECK-NEXT: ret +; CHECK-CVT-SD-LABEL: uitofp_i64: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: ucvtf v0.2d, v0.2d +; CHECK-CVT-SD-NEXT: ucvtf v2.2d, v2.2d +; CHECK-CVT-SD-NEXT: ucvtf v1.2d, v1.2d +; CHECK-CVT-SD-NEXT: ucvtf v3.2d, v3.2d +; CHECK-CVT-SD-NEXT: fcvtn v0.2s, v0.2d +; CHECK-CVT-SD-NEXT: fcvtn v2.2s, v2.2d +; CHECK-CVT-SD-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-CVT-SD-NEXT: fcvtn2 v2.4s, v3.2d +; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: uitofp_i64: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: ucvtf v0.2d, v0.2d +; CHECK-FP16-SD-NEXT: ucvtf v2.2d, v2.2d +; CHECK-FP16-SD-NEXT: ucvtf v1.2d, v1.2d +; CHECK-FP16-SD-NEXT: ucvtf v3.2d, v3.2d +; CHECK-FP16-SD-NEXT: fcvtn v0.2s, v0.2d +; CHECK-FP16-SD-NEXT: fcvtn v2.2s, v2.2d +; CHECK-FP16-SD-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-FP16-SD-NEXT: fcvtn2 v2.4s, v3.2d +; CHECK-FP16-SD-NEXT: fcvtn v0.4h, v0.4s +; CHECK-FP16-SD-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: uitofp_i64: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: ucvtf v0.2d, v0.2d +; CHECK-CVT-GI-NEXT: ucvtf v1.2d, v1.2d +; CHECK-CVT-GI-NEXT: ucvtf v2.2d, v2.2d +; CHECK-CVT-GI-NEXT: ucvtf v3.2d, v3.2d +; CHECK-CVT-GI-NEXT: fcvtn v0.2s, v0.2d +; CHECK-CVT-GI-NEXT: fcvtn v2.2s, v2.2d +; CHECK-CVT-GI-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-CVT-GI-NEXT: fcvtn2 v2.4s, v3.2d +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: uitofp_i64: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: ucvtf v0.2d, v0.2d +; CHECK-FP16-GI-NEXT: ucvtf v1.2d, v1.2d +; CHECK-FP16-GI-NEXT: ucvtf v2.2d, v2.2d +; CHECK-FP16-GI-NEXT: ucvtf v3.2d, v3.2d +; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d +; CHECK-FP16-GI-NEXT: fcvtn v2.2s, v2.2d +; CHECK-FP16-GI-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-FP16-GI-NEXT: fcvtn2 v2.4s, v3.2d +; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s +; CHECK-FP16-GI-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-FP16-GI-NEXT: ret %1 = uitofp <8 x i64> %a to <8 x half> ret <8 x half> %1 } @@ -436,94 +821,132 @@ define void @test_insert_at_zero(half %a, ptr %b) #0 { } define <8 x i8> @fptosi_i8(<8 x half> %a) #0 { -; CHECK-CVT-LABEL: fptosi_i8: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: fptosi_i8: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: fptosi_i8: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: fptosi_i8: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-GI-NEXT: ret %1 = fptosi<8 x half> %a to <8 x i8> ret <8 x i8> %1 } define <8 x i16> @fptosi_i16(<8 x half> %a) #0 { -; CHECK-CVT-LABEL: fptosi_i16: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: fptosi_i16: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: fptosi_i16: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: fptosi_i16: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-CVT-GI-NEXT: ret %1 = fptosi<8 x half> %a to <8 x i16> ret <8 x i16> %1 } define <8 x i8> @fptoui_i8(<8 x half> %a) #0 { -; CHECK-CVT-LABEL: fptoui_i8: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: fptoui_i8: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: fptoui_i8: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: fptoui_i8: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-GI-NEXT: ret %1 = fptoui<8 x half> %a to <8 x i8> ret <8 x i8> %1 } define <8 x i16> @fptoui_i16(<8 x half> %a) #0 { -; CHECK-CVT-LABEL: fptoui_i16: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: fptoui_i16: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: fptoui_i16: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: fptoui_i16: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-CVT-GI-NEXT: ret %1 = fptoui<8 x half> %a to <8 x i16> ret <8 x i16> %1 } define <8 x i1> @test_fcmp_une(<8 x half> %a, <8 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_une: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h -; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmeq v2.4s, v3.4s, v2.4s -; CHECK-CVT-NEXT: fcmeq v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; CHECK-CVT-NEXT: mvn v0.16b, v0.16b -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_une: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h +; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmeq v2.4s, v3.4s, v2.4s +; CHECK-CVT-SD-NEXT: fcmeq v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-CVT-SD-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_une: ; CHECK-FP16: // %bb.0: @@ -531,27 +954,41 @@ define <8 x i1> @test_fcmp_une(<8 x half> %a, <8 x half> %b) #0 { ; CHECK-FP16-NEXT: mvn v0.16b, v0.16b ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_une: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-GI-NEXT: fcmeq v2.4s, v2.4s, v3.4s +; CHECK-CVT-GI-NEXT: fcmeq v0.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: mvn v1.16b, v2.16b +; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-GI-NEXT: ret %1 = fcmp une <8 x half> %a, %b ret <8 x i1> %1 } define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_ueq: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h -; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmgt v4.4s, v3.4s, v2.4s -; CHECK-CVT-NEXT: fcmgt v2.4s, v2.4s, v3.4s -; CHECK-CVT-NEXT: fcmgt v3.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s -; CHECK-CVT-NEXT: orr v1.16b, v2.16b, v4.16b -; CHECK-CVT-NEXT: orr v0.16b, v0.16b, v3.16b -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-CVT-NEXT: mvn v0.16b, v0.16b -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_ueq: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h +; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmgt v4.4s, v3.4s, v2.4s +; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v2.4s, v3.4s +; CHECK-CVT-SD-NEXT: fcmgt v3.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-SD-NEXT: orr v1.16b, v2.16b, v4.16b +; CHECK-CVT-SD-NEXT: orr v0.16b, v0.16b, v3.16b +; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-CVT-SD-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ueq: ; CHECK-FP16: // %bb.0: @@ -561,23 +998,41 @@ define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 { ; CHECK-FP16-NEXT: mvn v0.16b, v0.16b ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ueq: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-GI-NEXT: fcmgt v4.4s, v2.4s, v3.4s +; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v3.4s, v2.4s +; CHECK-CVT-GI-NEXT: fcmgt v3.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-GI-NEXT: orr v1.16b, v2.16b, v4.16b +; CHECK-CVT-GI-NEXT: orr v0.16b, v0.16b, v3.16b +; CHECK-CVT-GI-NEXT: mvn v1.16b, v1.16b +; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-GI-NEXT: ret %1 = fcmp ueq <8 x half> %a, %b ret <8 x i1> %1 } define <8 x i1> @test_fcmp_ugt(<8 x half> %a, <8 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_ugt: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl2 v3.4s, v1.8h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcmge v2.4s, v3.4s, v2.4s -; CHECK-CVT-NEXT: fcmge v0.4s, v1.4s, v0.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; CHECK-CVT-NEXT: mvn v0.16b, v0.16b -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_ugt: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v1.8h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcmge v2.4s, v3.4s, v2.4s +; CHECK-CVT-SD-NEXT: fcmge v0.4s, v1.4s, v0.4s +; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-CVT-SD-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ugt: ; CHECK-FP16: // %bb.0: @@ -585,23 +1040,37 @@ define <8 x i1> @test_fcmp_ugt(<8 x half> %a, <8 x half> %b) #0 { ; CHECK-FP16-NEXT: mvn v0.16b, v0.16b ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ugt: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-GI-NEXT: fcmge v2.4s, v3.4s, v2.4s +; CHECK-CVT-GI-NEXT: fcmge v0.4s, v1.4s, v0.4s +; CHECK-CVT-GI-NEXT: mvn v1.16b, v2.16b +; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-GI-NEXT: ret %1 = fcmp ugt <8 x half> %a, %b ret <8 x i1> %1 } define <8 x i1> @test_fcmp_uge(<8 x half> %a, <8 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_uge: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl2 v3.4s, v1.8h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcmgt v2.4s, v3.4s, v2.4s -; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; CHECK-CVT-NEXT: mvn v0.16b, v0.16b -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_uge: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v1.8h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v3.4s, v2.4s +; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-CVT-SD-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_uge: ; CHECK-FP16: // %bb.0: @@ -609,23 +1078,37 @@ define <8 x i1> @test_fcmp_uge(<8 x half> %a, <8 x half> %b) #0 { ; CHECK-FP16-NEXT: mvn v0.16b, v0.16b ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_uge: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v3.4s, v2.4s +; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-GI-NEXT: mvn v1.16b, v2.16b +; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-GI-NEXT: ret %1 = fcmp uge <8 x half> %a, %b ret <8 x i1> %1 } define <8 x i1> @test_fcmp_ult(<8 x half> %a, <8 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_ult: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h -; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmge v2.4s, v3.4s, v2.4s -; CHECK-CVT-NEXT: fcmge v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; CHECK-CVT-NEXT: mvn v0.16b, v0.16b -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_ult: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h +; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmge v2.4s, v3.4s, v2.4s +; CHECK-CVT-SD-NEXT: fcmge v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-CVT-SD-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ult: ; CHECK-FP16: // %bb.0: @@ -633,23 +1116,37 @@ define <8 x i1> @test_fcmp_ult(<8 x half> %a, <8 x half> %b) #0 { ; CHECK-FP16-NEXT: mvn v0.16b, v0.16b ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ult: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-GI-NEXT: fcmge v2.4s, v2.4s, v3.4s +; CHECK-CVT-GI-NEXT: fcmge v0.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: mvn v1.16b, v2.16b +; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-GI-NEXT: ret %1 = fcmp ult <8 x half> %a, %b ret <8 x i1> %1 } define <8 x i1> @test_fcmp_ule(<8 x half> %a, <8 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_ule: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h -; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmgt v2.4s, v3.4s, v2.4s -; CHECK-CVT-NEXT: fcmgt v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; CHECK-CVT-NEXT: mvn v0.16b, v0.16b -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_ule: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h +; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v3.4s, v2.4s +; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-CVT-SD-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ule: ; CHECK-FP16: // %bb.0: @@ -657,27 +1154,41 @@ define <8 x i1> @test_fcmp_ule(<8 x half> %a, <8 x half> %b) #0 { ; CHECK-FP16-NEXT: mvn v0.16b, v0.16b ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ule: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v2.4s, v3.4s +; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: mvn v1.16b, v2.16b +; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-GI-NEXT: ret %1 = fcmp ule <8 x half> %a, %b ret <8 x i1> %1 } define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_uno: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h -; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmge v4.4s, v3.4s, v2.4s -; CHECK-CVT-NEXT: fcmgt v2.4s, v2.4s, v3.4s -; CHECK-CVT-NEXT: fcmge v3.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s -; CHECK-CVT-NEXT: orr v1.16b, v2.16b, v4.16b -; CHECK-CVT-NEXT: orr v0.16b, v0.16b, v3.16b -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-CVT-NEXT: mvn v0.16b, v0.16b -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_uno: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h +; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmge v4.4s, v3.4s, v2.4s +; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v2.4s, v3.4s +; CHECK-CVT-SD-NEXT: fcmge v3.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-SD-NEXT: orr v1.16b, v2.16b, v4.16b +; CHECK-CVT-SD-NEXT: orr v0.16b, v0.16b, v3.16b +; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-CVT-SD-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_uno: ; CHECK-FP16: // %bb.0: @@ -687,26 +1198,44 @@ define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 { ; CHECK-FP16-NEXT: mvn v0.16b, v0.16b ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_uno: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-GI-NEXT: fcmge v4.4s, v2.4s, v3.4s +; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v3.4s, v2.4s +; CHECK-CVT-GI-NEXT: fcmge v3.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-GI-NEXT: orr v1.16b, v2.16b, v4.16b +; CHECK-CVT-GI-NEXT: orr v0.16b, v0.16b, v3.16b +; CHECK-CVT-GI-NEXT: mvn v1.16b, v1.16b +; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-GI-NEXT: ret %1 = fcmp uno <8 x half> %a, %b ret <8 x i1> %1 } define <8 x i1> @test_fcmp_one(<8 x half> %a, <8 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_one: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h -; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmgt v4.4s, v3.4s, v2.4s -; CHECK-CVT-NEXT: fcmgt v2.4s, v2.4s, v3.4s -; CHECK-CVT-NEXT: fcmgt v3.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s -; CHECK-CVT-NEXT: orr v1.16b, v2.16b, v4.16b -; CHECK-CVT-NEXT: orr v0.16b, v0.16b, v3.16b -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_one: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h +; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmgt v4.4s, v3.4s, v2.4s +; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v2.4s, v3.4s +; CHECK-CVT-SD-NEXT: fcmgt v3.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-SD-NEXT: orr v1.16b, v2.16b, v4.16b +; CHECK-CVT-SD-NEXT: orr v0.16b, v0.16b, v3.16b +; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_one: ; CHECK-FP16: // %bb.0: @@ -715,136 +1244,212 @@ define <8 x i1> @test_fcmp_one(<8 x half> %a, <8 x half> %b) #0 { ; CHECK-FP16-NEXT: orr v0.16b, v0.16b, v2.16b ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_one: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-GI-NEXT: fcmgt v4.4s, v2.4s, v3.4s +; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v3.4s, v2.4s +; CHECK-CVT-GI-NEXT: fcmgt v3.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-GI-NEXT: orr v1.16b, v2.16b, v4.16b +; CHECK-CVT-GI-NEXT: orr v0.16b, v0.16b, v3.16b +; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-GI-NEXT: ret %1 = fcmp one <8 x half> %a, %b ret <8 x i1> %1 } define <8 x i1> @test_fcmp_oeq(<8 x half> %a, <8 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_oeq: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h -; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmeq v2.4s, v3.4s, v2.4s -; CHECK-CVT-NEXT: fcmeq v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_oeq: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h +; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmeq v2.4s, v3.4s, v2.4s +; CHECK-CVT-SD-NEXT: fcmeq v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_oeq: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcmeq v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_oeq: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-GI-NEXT: fcmeq v2.4s, v2.4s, v3.4s +; CHECK-CVT-GI-NEXT: fcmeq v0.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v2.8h, v0.8h +; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-GI-NEXT: ret %1 = fcmp oeq <8 x half> %a, %b ret <8 x i1> %1 } define <8 x i1> @test_fcmp_ogt(<8 x half> %a, <8 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_ogt: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h -; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmgt v2.4s, v3.4s, v2.4s -; CHECK-CVT-NEXT: fcmgt v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_ogt: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h +; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v3.4s, v2.4s +; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ogt: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcmgt v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ogt: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v2.4s, v3.4s +; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v2.8h, v0.8h +; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-GI-NEXT: ret %1 = fcmp ogt <8 x half> %a, %b ret <8 x i1> %1 } define <8 x i1> @test_fcmp_oge(<8 x half> %a, <8 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_oge: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h -; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmge v2.4s, v3.4s, v2.4s -; CHECK-CVT-NEXT: fcmge v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_oge: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h +; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmge v2.4s, v3.4s, v2.4s +; CHECK-CVT-SD-NEXT: fcmge v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_oge: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcmge v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_oge: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-GI-NEXT: fcmge v2.4s, v2.4s, v3.4s +; CHECK-CVT-GI-NEXT: fcmge v0.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v2.8h, v0.8h +; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-GI-NEXT: ret %1 = fcmp oge <8 x half> %a, %b ret <8 x i1> %1 } define <8 x i1> @test_fcmp_olt(<8 x half> %a, <8 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_olt: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl2 v3.4s, v1.8h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcmgt v2.4s, v3.4s, v2.4s -; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_olt: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v1.8h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v3.4s, v2.4s +; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_olt: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_olt: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v3.4s, v2.4s +; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v2.8h, v0.8h +; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-GI-NEXT: ret %1 = fcmp olt <8 x half> %a, %b ret <8 x i1> %1 } define <8 x i1> @test_fcmp_ole(<8 x half> %a, <8 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_ole: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl2 v3.4s, v1.8h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcmge v2.4s, v3.4s, v2.4s -; CHECK-CVT-NEXT: fcmge v0.4s, v1.4s, v0.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_ole: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v1.8h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcmge v2.4s, v3.4s, v2.4s +; CHECK-CVT-SD-NEXT: fcmge v0.4s, v1.4s, v0.4s +; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ole: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcmge v0.8h, v1.8h, v0.8h ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ole: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-GI-NEXT: fcmge v2.4s, v3.4s, v2.4s +; CHECK-CVT-GI-NEXT: fcmge v0.4s, v1.4s, v0.4s +; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v2.8h, v0.8h +; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-GI-NEXT: ret %1 = fcmp ole <8 x half> %a, %b ret <8 x i1> %1 } define <8 x i1> @test_fcmp_ord(<8 x half> %a, <8 x half> %b) #0 { -; CHECK-CVT-LABEL: test_fcmp_ord: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h -; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcmge v4.4s, v3.4s, v2.4s -; CHECK-CVT-NEXT: fcmgt v2.4s, v2.4s, v3.4s -; CHECK-CVT-NEXT: fcmge v3.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s -; CHECK-CVT-NEXT: orr v1.16b, v2.16b, v4.16b -; CHECK-CVT-NEXT: orr v0.16b, v0.16b, v3.16b -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h -; CHECK-CVT-NEXT: ret +; CHECK-CVT-SD-LABEL: test_fcmp_ord: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h +; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: fcmge v4.4s, v3.4s, v2.4s +; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v2.4s, v3.4s +; CHECK-CVT-SD-NEXT: fcmge v3.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-SD-NEXT: orr v1.16b, v2.16b, v4.16b +; CHECK-CVT-SD-NEXT: orr v0.16b, v0.16b, v3.16b +; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-SD-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ord: ; CHECK-FP16: // %bb.0: @@ -853,8 +1458,27 @@ define <8 x i1> @test_fcmp_ord(<8 x half> %a, <8 x half> %b) #0 { ; CHECK-FP16-NEXT: orr v0.16b, v0.16b, v2.16b ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ord: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-CVT-GI-NEXT: fcmge v4.4s, v2.4s, v3.4s +; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v3.4s, v2.4s +; CHECK-CVT-GI-NEXT: fcmge v3.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-CVT-GI-NEXT: orr v1.16b, v2.16b, v4.16b +; CHECK-CVT-GI-NEXT: orr v0.16b, v0.16b, v3.16b +; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-GI-NEXT: ret %1 = fcmp ord <8 x half> %a, %b ret <8 x i1> %1 } attributes #0 = { nounwind } + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-CVT: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/strict-fp-opt.ll b/llvm/test/CodeGen/AArch64/strict-fp-opt.ll index bb7cd22c01b4..c433291ff576 100644 --- a/llvm/test/CodeGen/AArch64/strict-fp-opt.ll +++ b/llvm/test/CodeGen/AArch64/strict-fp-opt.ll @@ -1,31 +1,40 @@ -; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s -; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 %s -o - | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; CHECK-GI: warning: Instruction selection used fallback path for unused_div_fpexcept_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for unused_div_round_dynamic +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for add_twice_fpexcept_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for add_twice_round_dynamic +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for set_rounding +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for set_rounding_fpexcept_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for set_rounding_round_dynamic ; Div whose result is unused should be removed unless we have strict exceptions -; CHECK-LABEL: unused_div: -; CHECK-NOT: fdiv -; CHECK: ret define void @unused_div(float %x, float %y) { +; CHECK-LABEL: unused_div: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ret entry: %add = fdiv float %x, %y ret void } -; CHECK-LABEL: unused_div_fpexcept_strict: -; CHECK: fdiv s0, s0, s1 -; CHECK-NEXT: ret define void @unused_div_fpexcept_strict(float %x, float %y) #0 { +; CHECK-LABEL: unused_div_fpexcept_strict: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fdiv s0, s0, s1 +; CHECK-NEXT: ret entry: %add = call float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret void } -; CHECK-LABEL: unused_div_round_dynamic: -; CHECK-NOT: fdiv -; CHECK: ret define void @unused_div_round_dynamic(float %x, float %y) #0 { +; CHECK-LABEL: unused_div_round_dynamic: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ret entry: %add = call float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 ret void @@ -33,14 +42,14 @@ entry: ; Machine CSE should eliminate the second add unless we have strict exceptions - -; CHECK-LABEL: add_twice: -; CHECK: fadd [[ADD:s[0-9]+]], s0, s1 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: fmul [[MUL:s[0-9]+]], [[ADD]], [[ADD]] -; CHECK-NEXT: fcsel s0, [[ADD]], [[MUL]], eq -; CHECK-NEXT: ret define float @add_twice(float %x, float %y, i32 %n) { +; CHECK-LABEL: add_twice: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: fmul s1, s0, s0 +; CHECK-NEXT: fcsel s0, s0, s1, eq +; CHECK-NEXT: ret entry: %add = fadd float %x, %y %tobool.not = icmp eq i32 %n, 0 @@ -56,15 +65,17 @@ if.end: ret float %a.0 } -; CHECK-LABEL: add_twice_fpexcept_strict: -; CHECK: fmov [[X:s[0-9]+]], s0 -; CHECK-NEXT: fadd s0, s0, s1 -; CHECK-NEXT: cbz w0, [[LABEL:.LBB[0-9_]+]] -; CHECK: fadd [[ADD:s[0-9]+]], [[X]], s1 -; CHECK-NEXT: fmul s0, s0, [[ADD]] -; CHECK: [[LABEL]]: -; CHECK-NEXT: ret define float @add_twice_fpexcept_strict(float %x, float %y, i32 %n) #0 { +; CHECK-LABEL: add_twice_fpexcept_strict: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s2, s0 +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: cbz w0, .LBB4_2 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: fadd s1, s2, s1 +; CHECK-NEXT: fmul s0, s0, s1 +; CHECK-NEXT: .LBB4_2: // %if.end +; CHECK-NEXT: ret entry: %add = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 %tobool.not = icmp eq i32 %n, 0 @@ -80,14 +91,15 @@ if.end: ret float %a.0 } -; CHECK-LABEL: add_twice_round_dynamic: -; CHECK: fadd s0, s0, s1 -; CHECK-NEXT: cbz w0, [[LABEL:.LBB[0-9_]+]] -; CHECK-NOT: fadd -; CHECK: fmul s0, s0, s0 -; CHECK: [[LABEL]]: -; CHECK-NEXT: ret define float @add_twice_round_dynamic(float %x, float %y, i32 %n) #0 { +; CHECK-LABEL: add_twice_round_dynamic: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: cbz w0, .LBB5_2 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: fmul s0, s0, s0 +; CHECK-NEXT: .LBB5_2: // %if.end +; CHECK-NEXT: ret entry: %add = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 %tobool.not = icmp eq i32 %n, 0 @@ -108,17 +120,18 @@ if.end: ; dynamic (as they may give different results) or when we have strict exceptions ; (the llvm.set.rounding is irrelevant, but both could trap). -; CHECK-LABEL: set_rounding: -; CHECK-DAG: fadd [[SREG:s[0-9]+]], s0, s1 -; CHECK-DAG: mrs [[XREG1:x[0-9]+]], FPCR -; CHECK-DAG: orr [[XREG2:x[0-9]+]], [[XREG1]], #0xc00000 -; CHECK: msr FPCR, [[XREG2]] -; CHECK-NEXT: mrs [[XREG3:x[0-9]+]], FPCR -; CHECK-NEXT: and [[XREG4:x[0-9]+]], [[XREG3]], #0xffffffffff3fffff -; CHECK-NEXT: msr FPCR, [[XREG4]] -; CHECK-NEXT: fsub s0, [[SREG]], [[SREG]] -; CHECK-NEXT: ret define float @set_rounding(float %x, float %y) { +; CHECK-LABEL: set_rounding: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mrs x8, FPCR +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: orr x8, x8, #0xc00000 +; CHECK-NEXT: msr FPCR, x8 +; CHECK-NEXT: mrs x8, FPCR +; CHECK-NEXT: and x8, x8, #0xffffffffff3fffff +; CHECK-NEXT: msr FPCR, x8 +; CHECK-NEXT: fsub s0, s0, s0 +; CHECK-NEXT: ret entry: %add1 = fadd float %x, %y call void @llvm.set.rounding(i32 0) @@ -128,18 +141,19 @@ entry: ret float %sub } -; CHECK-LABEL: set_rounding_fpexcept_strict: -; CHECK-DAG: fadd [[SREG1:s[0-9]+]], s0, s1 -; CHECK-DAG: mrs [[XREG1:x[0-9]+]], FPCR -; CHECK-DAG: orr [[XREG2:x[0-9]+]], [[XREG1]], #0xc00000 -; CHECK: msr FPCR, [[XREG2]] -; CHECK-DAG: fadd [[SREG2:s[0-9]+]], s0, s1 -; CHECK-DAG: mrs [[XREG3:x[0-9]+]], FPCR -; CHECK-DAG: and [[XREG4:x[0-9]+]], [[XREG3]], #0xffffffffff3fffff -; CHECK-NEXT: msr FPCR, [[XREG4]] -; CHECK-NEXT: fsub s0, [[SREG1]], [[SREG2]] -; CHECK-NEXT: ret define float @set_rounding_fpexcept_strict(float %x, float %y) #0 { +; CHECK-LABEL: set_rounding_fpexcept_strict: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fadd s2, s0, s1 +; CHECK-NEXT: mrs x8, FPCR +; CHECK-NEXT: orr x8, x8, #0xc00000 +; CHECK-NEXT: msr FPCR, x8 +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: mrs x8, FPCR +; CHECK-NEXT: and x8, x8, #0xffffffffff3fffff +; CHECK-NEXT: msr FPCR, x8 +; CHECK-NEXT: fsub s0, s2, s0 +; CHECK-NEXT: ret entry: %add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 call void @llvm.set.rounding(i32 0) #0 @@ -149,18 +163,19 @@ entry: ret float %sub } -; CHECK-LABEL: set_rounding_round_dynamic: -; CHECK-DAG: fadd [[SREG1:s[0-9]+]], s0, s1 -; CHECK-DAG: mrs [[XREG1:x[0-9]+]], FPCR -; CHECK-DAG: orr [[XREG2:x[0-9]+]], [[XREG1]], #0xc00000 -; CHECK: msr FPCR, [[XREG2]] -; CHECK-DAG: fadd [[SREG2:s[0-9]+]], s0, s1 -; CHECK-DAG: mrs [[XREG3:x[0-9]+]], FPCR -; CHECK-DAG: and [[XREG4:x[0-9]+]], [[XREG3]], #0xffffffffff3fffff -; CHECK-NEXT: msr FPCR, [[XREG4]] -; CHECK-NEXT: fsub s0, [[SREG1]], [[SREG2]] -; CHECK-NEXT: ret define float @set_rounding_round_dynamic(float %x, float %y) #0 { +; CHECK-LABEL: set_rounding_round_dynamic: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mrs x8, FPCR +; CHECK-NEXT: fadd s2, s0, s1 +; CHECK-NEXT: orr x8, x8, #0xc00000 +; CHECK-NEXT: msr FPCR, x8 +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: mrs x8, FPCR +; CHECK-NEXT: and x8, x8, #0xffffffffff3fffff +; CHECK-NEXT: msr FPCR, x8 +; CHECK-NEXT: fsub s0, s2, s0 +; CHECK-NEXT: ret entry: %add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 call void @llvm.set.rounding(i32 0) #0 @@ -178,3 +193,6 @@ declare i32 @llvm.get.rounding() declare void @llvm.set.rounding(i32) attributes #0 = { strictfp } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-GI: {{.*}} +; CHECK-SD: {{.*}}