
A bf16 fp_extend is just a shift into the higher bits. This changes the lowering from using a relatively ugly tablegen pattern, to ISel generating the shift using an extended vector. This is cleaner and should optimize better. StrictFP goes through the same route as it cannot round or set flags.
258 lines
8.7 KiB
LLVM
258 lines
8.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=aarch64 -o - %s -mattr=+neon,+fullfp16 | FileCheck %s
|
|
|
|
define double @t1(double %x) {
|
|
; CHECK-LABEL: t1:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcvtzs d0, d0
|
|
; CHECK-NEXT: scvtf d0, d0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%conv = fptosi double %x to i64
|
|
%conv1 = sitofp i64 %conv to double
|
|
ret double %conv1
|
|
}
|
|
|
|
define float @t2(float %x) {
|
|
; CHECK-LABEL: t2:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcvtzs s0, s0
|
|
; CHECK-NEXT: scvtf s0, s0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%conv = fptosi float %x to i32
|
|
%conv1 = sitofp i32 %conv to float
|
|
ret float %conv1
|
|
}
|
|
|
|
define half @t3(half %x) {
|
|
; CHECK-LABEL: t3:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcvtzs h0, h0
|
|
; CHECK-NEXT: scvtf h0, h0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%conv = fptosi half %x to i32
|
|
%conv1 = sitofp i32 %conv to half
|
|
ret half %conv1
|
|
}
|
|
|
|
define double @t4(double %x) {
|
|
; CHECK-LABEL: t4:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcvtzu d0, d0
|
|
; CHECK-NEXT: ucvtf d0, d0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%conv = fptoui double %x to i64
|
|
%conv1 = uitofp i64 %conv to double
|
|
ret double %conv1
|
|
}
|
|
|
|
define float @t5(float %x) {
|
|
; CHECK-LABEL: t5:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcvtzu s0, s0
|
|
; CHECK-NEXT: ucvtf s0, s0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%conv = fptoui float %x to i32
|
|
%conv1 = uitofp i32 %conv to float
|
|
ret float %conv1
|
|
}
|
|
|
|
define half @t6(half %x) {
|
|
; CHECK-LABEL: t6:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcvtzu h0, h0
|
|
; CHECK-NEXT: ucvtf h0, h0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%conv = fptoui half %x to i32
|
|
%conv1 = uitofp i32 %conv to half
|
|
ret half %conv1
|
|
}
|
|
|
|
define bfloat @t7(bfloat %x) {
|
|
; CHECK-LABEL: t7:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $d0
|
|
; CHECK-NEXT: mov w8, #32767 // =0x7fff
|
|
; CHECK-NEXT: shll v0.4s, v0.4h, #16
|
|
; CHECK-NEXT: fcvtzs w9, s0
|
|
; CHECK-NEXT: scvtf d0, w9
|
|
; CHECK-NEXT: fcvtxn s0, d0
|
|
; CHECK-NEXT: fmov w9, s0
|
|
; CHECK-NEXT: ubfx w10, w9, #16, #1
|
|
; CHECK-NEXT: add w8, w9, w8
|
|
; CHECK-NEXT: add w8, w10, w8
|
|
; CHECK-NEXT: lsr w8, w8, #16
|
|
; CHECK-NEXT: fmov s0, w8
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%conv = fptosi bfloat %x to i32
|
|
%conv1 = sitofp i32 %conv to bfloat
|
|
ret bfloat %conv1
|
|
}
|
|
|
|
define bfloat @t8(bfloat %x) {
|
|
; CHECK-LABEL: t8:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $d0
|
|
; CHECK-NEXT: mov w8, #32767 // =0x7fff
|
|
; CHECK-NEXT: shll v0.4s, v0.4h, #16
|
|
; CHECK-NEXT: fcvtzu w9, s0
|
|
; CHECK-NEXT: ucvtf d0, w9
|
|
; CHECK-NEXT: fcvtxn s0, d0
|
|
; CHECK-NEXT: fmov w9, s0
|
|
; CHECK-NEXT: ubfx w10, w9, #16, #1
|
|
; CHECK-NEXT: add w8, w9, w8
|
|
; CHECK-NEXT: add w8, w10, w8
|
|
; CHECK-NEXT: lsr w8, w8, #16
|
|
; CHECK-NEXT: fmov s0, w8
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%conv = fptoui bfloat %x to i32
|
|
%conv1 = uitofp i32 %conv to bfloat
|
|
ret bfloat %conv1
|
|
}
|
|
|
|
define double @t1_strict(double %x) #0 {
|
|
; CHECK-LABEL: t1_strict:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcvtzs d0, d0
|
|
; CHECK-NEXT: scvtf d0, d0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%conv = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict") #0
|
|
%conv1 = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
|
|
ret double %conv1
|
|
}
|
|
|
|
define float @t2_strict(float %x) #0 {
|
|
; CHECK-LABEL: t2_strict:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcvtzs s0, s0
|
|
; CHECK-NEXT: scvtf s0, s0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict") #0
|
|
%conv1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
|
|
ret float %conv1
|
|
}
|
|
|
|
define half @t3_strict(half %x) #0 {
|
|
; CHECK-LABEL: t3_strict:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcvtzs h0, h0
|
|
; CHECK-NEXT: scvtf h0, h0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%conv = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict") #0
|
|
%conv1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
|
|
ret half %conv1
|
|
}
|
|
|
|
define double @t4_strict(double %x) #0 {
|
|
; CHECK-LABEL: t4_strict:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcvtzu d0, d0
|
|
; CHECK-NEXT: ucvtf d0, d0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%conv = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict") #0
|
|
%conv1 = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
|
|
ret double %conv1
|
|
}
|
|
|
|
define float @t5_strict(float %x) #0 {
|
|
; CHECK-LABEL: t5_strict:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcvtzu s0, s0
|
|
; CHECK-NEXT: ucvtf s0, s0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%conv = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") #0
|
|
%conv1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
|
|
ret float %conv1
|
|
}
|
|
|
|
define half @t6_strict(half %x) #0 {
|
|
; CHECK-LABEL: t6_strict:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcvtzu h0, h0
|
|
; CHECK-NEXT: ucvtf h0, h0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%conv = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") #0
|
|
%conv1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
|
|
ret half %conv1
|
|
}
|
|
|
|
define bfloat @t7_strict(bfloat %x) #0 {
|
|
; CHECK-LABEL: t7_strict:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $d0
|
|
; CHECK-NEXT: mov w8, #32767 // =0x7fff
|
|
; CHECK-NEXT: shll v0.4s, v0.4h, #16
|
|
; CHECK-NEXT: fcvtzs w9, s0
|
|
; CHECK-NEXT: scvtf d0, w9
|
|
; CHECK-NEXT: fcvtxn s0, d0
|
|
; CHECK-NEXT: fmov w9, s0
|
|
; CHECK-NEXT: ubfx w10, w9, #16, #1
|
|
; CHECK-NEXT: add w8, w9, w8
|
|
; CHECK-NEXT: add w8, w10, w8
|
|
; CHECK-NEXT: lsr w8, w8, #16
|
|
; CHECK-NEXT: fmov s0, w8
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%conv = call i32 @llvm.experimental.constrained.fptosi.i32.bf16(bfloat %x, metadata !"fpexcept.strict") #0
|
|
%conv1 = call bfloat @llvm.experimental.constrained.sitofp.bf16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
|
|
ret bfloat %conv1
|
|
}
|
|
|
|
define bfloat @t8_strict(bfloat %x) #0 {
|
|
; CHECK-LABEL: t8_strict:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $d0
|
|
; CHECK-NEXT: mov w8, #32767 // =0x7fff
|
|
; CHECK-NEXT: shll v0.4s, v0.4h, #16
|
|
; CHECK-NEXT: fcvtzu w9, s0
|
|
; CHECK-NEXT: ucvtf d0, w9
|
|
; CHECK-NEXT: fcvtxn s0, d0
|
|
; CHECK-NEXT: fmov w9, s0
|
|
; CHECK-NEXT: ubfx w10, w9, #16, #1
|
|
; CHECK-NEXT: add w8, w9, w8
|
|
; CHECK-NEXT: add w8, w10, w8
|
|
; CHECK-NEXT: lsr w8, w8, #16
|
|
; CHECK-NEXT: fmov s0, w8
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%conv = call i32 @llvm.experimental.constrained.fptoui.i32.bf16(bfloat %x, metadata !"fpexcept.strict") #0
|
|
%conv1 = call bfloat @llvm.experimental.constrained.uitofp.bf16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
|
|
ret bfloat %conv1
|
|
}
|
|
|
|
attributes #0 = { strictfp }
|
|
|
|
declare i32 @llvm.experimental.constrained.fptosi.i32.bf16(bfloat, metadata)
|
|
declare i32 @llvm.experimental.constrained.fptoui.i32.bf16(bfloat, metadata)
|
|
declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata)
|
|
declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata)
|
|
declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata)
|
|
declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata)
|
|
declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata)
|
|
declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata)
|
|
declare bfloat @llvm.experimental.constrained.sitofp.bf16.i32(i32, metadata, metadata)
|
|
declare bfloat @llvm.experimental.constrained.uitofp.bf16.i32(i32, metadata, metadata)
|
|
declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata)
|
|
declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata)
|
|
declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata)
|
|
declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata)
|
|
declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata)
|
|
declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata)
|