llvm-project/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
David Green 1729e6e742
[AArch64] Improve bf16 fp_extend lowering. (#118966)
A bf16 fp_extend is just a shift into the higher bits. This changes the
lowering from using a relatively ugly tablegen pattern, to ISel
generating the shift using an extended vector. This is cleaner and
should optimize better. StrictFP goes through the same route as it
cannot round or set flags.
2025-01-07 11:43:14 +00:00

258 lines
8.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64 -o - %s -mattr=+neon,+fullfp16 | FileCheck %s
define double @t1(double %x) {
; CHECK-LABEL: t1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs d0, d0
; CHECK-NEXT: scvtf d0, d0
; CHECK-NEXT: ret
entry:
%conv = fptosi double %x to i64
%conv1 = sitofp i64 %conv to double
ret double %conv1
}
define float @t2(float %x) {
; CHECK-LABEL: t2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs s0, s0
; CHECK-NEXT: scvtf s0, s0
; CHECK-NEXT: ret
entry:
%conv = fptosi float %x to i32
%conv1 = sitofp i32 %conv to float
ret float %conv1
}
define half @t3(half %x) {
; CHECK-LABEL: t3:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs h0, h0
; CHECK-NEXT: scvtf h0, h0
; CHECK-NEXT: ret
entry:
%conv = fptosi half %x to i32
%conv1 = sitofp i32 %conv to half
ret half %conv1
}
define double @t4(double %x) {
; CHECK-LABEL: t4:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzu d0, d0
; CHECK-NEXT: ucvtf d0, d0
; CHECK-NEXT: ret
entry:
%conv = fptoui double %x to i64
%conv1 = uitofp i64 %conv to double
ret double %conv1
}
define float @t5(float %x) {
; CHECK-LABEL: t5:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzu s0, s0
; CHECK-NEXT: ucvtf s0, s0
; CHECK-NEXT: ret
entry:
%conv = fptoui float %x to i32
%conv1 = uitofp i32 %conv to float
ret float %conv1
}
define half @t6(half %x) {
; CHECK-LABEL: t6:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzu h0, h0
; CHECK-NEXT: ucvtf h0, h0
; CHECK-NEXT: ret
entry:
%conv = fptoui half %x to i32
%conv1 = uitofp i32 %conv to half
ret half %conv1
}
define bfloat @t7(bfloat %x) {
; CHECK-LABEL: t7:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $h0 killed $h0 def $d0
; CHECK-NEXT: mov w8, #32767 // =0x7fff
; CHECK-NEXT: shll v0.4s, v0.4h, #16
; CHECK-NEXT: fcvtzs w9, s0
; CHECK-NEXT: scvtf d0, w9
; CHECK-NEXT: fcvtxn s0, d0
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: ubfx w10, w9, #16, #1
; CHECK-NEXT: add w8, w9, w8
; CHECK-NEXT: add w8, w10, w8
; CHECK-NEXT: lsr w8, w8, #16
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
; CHECK-NEXT: ret
entry:
%conv = fptosi bfloat %x to i32
%conv1 = sitofp i32 %conv to bfloat
ret bfloat %conv1
}
define bfloat @t8(bfloat %x) {
; CHECK-LABEL: t8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $h0 killed $h0 def $d0
; CHECK-NEXT: mov w8, #32767 // =0x7fff
; CHECK-NEXT: shll v0.4s, v0.4h, #16
; CHECK-NEXT: fcvtzu w9, s0
; CHECK-NEXT: ucvtf d0, w9
; CHECK-NEXT: fcvtxn s0, d0
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: ubfx w10, w9, #16, #1
; CHECK-NEXT: add w8, w9, w8
; CHECK-NEXT: add w8, w10, w8
; CHECK-NEXT: lsr w8, w8, #16
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
; CHECK-NEXT: ret
entry:
%conv = fptoui bfloat %x to i32
%conv1 = uitofp i32 %conv to bfloat
ret bfloat %conv1
}
define double @t1_strict(double %x) #0 {
; CHECK-LABEL: t1_strict:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs d0, d0
; CHECK-NEXT: scvtf d0, d0
; CHECK-NEXT: ret
entry:
%conv = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict") #0
%conv1 = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
ret double %conv1
}
define float @t2_strict(float %x) #0 {
; CHECK-LABEL: t2_strict:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs s0, s0
; CHECK-NEXT: scvtf s0, s0
; CHECK-NEXT: ret
entry:
%conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict") #0
%conv1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
ret float %conv1
}
define half @t3_strict(half %x) #0 {
; CHECK-LABEL: t3_strict:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs h0, h0
; CHECK-NEXT: scvtf h0, h0
; CHECK-NEXT: ret
entry:
%conv = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict") #0
%conv1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
ret half %conv1
}
define double @t4_strict(double %x) #0 {
; CHECK-LABEL: t4_strict:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzu d0, d0
; CHECK-NEXT: ucvtf d0, d0
; CHECK-NEXT: ret
entry:
%conv = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict") #0
%conv1 = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
ret double %conv1
}
define float @t5_strict(float %x) #0 {
; CHECK-LABEL: t5_strict:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzu s0, s0
; CHECK-NEXT: ucvtf s0, s0
; CHECK-NEXT: ret
entry:
%conv = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") #0
%conv1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
ret float %conv1
}
define half @t6_strict(half %x) #0 {
; CHECK-LABEL: t6_strict:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzu h0, h0
; CHECK-NEXT: ucvtf h0, h0
; CHECK-NEXT: ret
entry:
%conv = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") #0
%conv1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
ret half %conv1
}
define bfloat @t7_strict(bfloat %x) #0 {
; CHECK-LABEL: t7_strict:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $h0 killed $h0 def $d0
; CHECK-NEXT: mov w8, #32767 // =0x7fff
; CHECK-NEXT: shll v0.4s, v0.4h, #16
; CHECK-NEXT: fcvtzs w9, s0
; CHECK-NEXT: scvtf d0, w9
; CHECK-NEXT: fcvtxn s0, d0
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: ubfx w10, w9, #16, #1
; CHECK-NEXT: add w8, w9, w8
; CHECK-NEXT: add w8, w10, w8
; CHECK-NEXT: lsr w8, w8, #16
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
; CHECK-NEXT: ret
entry:
%conv = call i32 @llvm.experimental.constrained.fptosi.i32.bf16(bfloat %x, metadata !"fpexcept.strict") #0
%conv1 = call bfloat @llvm.experimental.constrained.sitofp.bf16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
ret bfloat %conv1
}
define bfloat @t8_strict(bfloat %x) #0 {
; CHECK-LABEL: t8_strict:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $h0 killed $h0 def $d0
; CHECK-NEXT: mov w8, #32767 // =0x7fff
; CHECK-NEXT: shll v0.4s, v0.4h, #16
; CHECK-NEXT: fcvtzu w9, s0
; CHECK-NEXT: ucvtf d0, w9
; CHECK-NEXT: fcvtxn s0, d0
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: ubfx w10, w9, #16, #1
; CHECK-NEXT: add w8, w9, w8
; CHECK-NEXT: add w8, w10, w8
; CHECK-NEXT: lsr w8, w8, #16
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
; CHECK-NEXT: ret
entry:
%conv = call i32 @llvm.experimental.constrained.fptoui.i32.bf16(bfloat %x, metadata !"fpexcept.strict") #0
%conv1 = call bfloat @llvm.experimental.constrained.uitofp.bf16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
ret bfloat %conv1
}
attributes #0 = { strictfp }
declare i32 @llvm.experimental.constrained.fptosi.i32.bf16(bfloat, metadata)
declare i32 @llvm.experimental.constrained.fptoui.i32.bf16(bfloat, metadata)
declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata)
declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata)
declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata)
declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata)
declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata)
declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata)
declare bfloat @llvm.experimental.constrained.sitofp.bf16.i32(i32, metadata, metadata)
declare bfloat @llvm.experimental.constrained.uitofp.bf16.i32(i32, metadata, metadata)
declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata)
declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata)
declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata)
declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata)
declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata)
declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata)