[SelectionDAG] Add STRICT_BF16_TO_FP
and STRICT_FP_TO_BF16
(#80056)
This patch adds the support for `STRICT_BF16_TO_FP` and `STRICT_FP_TO_BF16`.
This commit is contained in:
parent
2c5d01c2cf
commit
8300f30a92
@ -190,6 +190,7 @@ set(GENERIC_SOURCES
|
||||
|
||||
# We only build BF16 files when "__bf16" is available.
|
||||
set(BF16_SOURCES
|
||||
extendbfsf2.c
|
||||
truncdfbf2.c
|
||||
truncsfbf2.c
|
||||
)
|
||||
|
13
compiler-rt/lib/builtins/extendbfsf2.c
Normal file
13
compiler-rt/lib/builtins/extendbfsf2.c
Normal file
@ -0,0 +1,13 @@
|
||||
//===-- lib/extendbfsf2.c - bfloat -> single conversion -----------*- C -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define SRC_BFLOAT16
|
||||
#define DST_SINGLE
|
||||
#include "fp_extend_impl.inc"
|
||||
|
||||
COMPILER_RT_ABI float __extendbfsf2(src_t a) { return __extendXfYf2__(a); }
|
@ -81,6 +81,21 @@ static inline int src_rep_t_clz_impl(src_rep_t a) {
|
||||
|
||||
#define src_rep_t_clz src_rep_t_clz_impl
|
||||
|
||||
#elif defined SRC_BFLOAT16
|
||||
#ifdef COMPILER_RT_HAS_BFLOAT16
|
||||
typedef __bf16 src_t;
|
||||
#else
|
||||
typedef uint16_t src_t;
|
||||
#endif
|
||||
typedef uint16_t src_rep_t;
|
||||
#define SRC_REP_C UINT16_C
|
||||
static const int srcBits = sizeof(src_t) * CHAR_BIT;
|
||||
static const int srcSigFracBits = 7;
|
||||
// -1 accounts for the sign bit.
|
||||
// srcBits - srcSigFracBits - 1
|
||||
static const int srcExpBits = 8;
|
||||
#define src_rep_t_clz __builtin_clz
|
||||
|
||||
#else
|
||||
#error Source should be half, single, or double precision!
|
||||
#endif // end source precision
|
||||
|
@ -921,6 +921,8 @@ enum NodeType {
|
||||
/// has native conversions.
|
||||
BF16_TO_FP,
|
||||
FP_TO_BF16,
|
||||
STRICT_BF16_TO_FP,
|
||||
STRICT_FP_TO_BF16,
|
||||
|
||||
/// Perform various unary floating-point operations inspired by libm. For
|
||||
/// FPOWI, the result is undefined if the integer operand doesn't fit into
|
||||
|
@ -698,6 +698,8 @@ public:
|
||||
return false;
|
||||
case ISD::STRICT_FP16_TO_FP:
|
||||
case ISD::STRICT_FP_TO_FP16:
|
||||
case ISD::STRICT_BF16_TO_FP:
|
||||
case ISD::STRICT_FP_TO_BF16:
|
||||
#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
|
||||
case ISD::STRICT_##DAGN:
|
||||
#include "llvm/IR/ConstrainedOps.def"
|
||||
|
@ -304,6 +304,7 @@ HANDLE_LIBCALL(FEGETMODE, "fegetmode")
|
||||
HANDLE_LIBCALL(FESETMODE, "fesetmode")
|
||||
|
||||
// Conversion
|
||||
HANDLE_LIBCALL(FPEXT_BF16_F32, "__extendbfsf2")
|
||||
HANDLE_LIBCALL(FPEXT_F32_PPCF128, "__gcc_stoq")
|
||||
HANDLE_LIBCALL(FPEXT_F64_PPCF128, "__gcc_dtoq")
|
||||
HANDLE_LIBCALL(FPEXT_F80_F128, "__extendxftf2")
|
||||
|
@ -541,6 +541,8 @@ def fp_to_sint_sat : SDNode<"ISD::FP_TO_SINT_SAT" , SDTFPToIntSatOp>;
|
||||
def fp_to_uint_sat : SDNode<"ISD::FP_TO_UINT_SAT" , SDTFPToIntSatOp>;
|
||||
def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>;
|
||||
def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>;
|
||||
def bf16_to_fp : SDNode<"ISD::BF16_TO_FP" , SDTIntToFPOp>;
|
||||
def fp_to_bf16 : SDNode<"ISD::FP_TO_BF16" , SDTFPToIntOp>;
|
||||
|
||||
def strict_fadd : SDNode<"ISD::STRICT_FADD",
|
||||
SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>;
|
||||
@ -620,6 +622,11 @@ def strict_f16_to_fp : SDNode<"ISD::STRICT_FP16_TO_FP",
|
||||
def strict_fp_to_f16 : SDNode<"ISD::STRICT_FP_TO_FP16",
|
||||
SDTFPToIntOp, [SDNPHasChain]>;
|
||||
|
||||
def strict_bf16_to_fp : SDNode<"ISD::STRICT_BF16_TO_FP",
|
||||
SDTIntToFPOp, [SDNPHasChain]>;
|
||||
def strict_fp_to_bf16 : SDNode<"ISD::STRICT_FP_TO_BF16",
|
||||
SDTFPToIntOp, [SDNPHasChain]>;
|
||||
|
||||
def strict_fsetcc : SDNode<"ISD::STRICT_FSETCC", SDTSetCC, [SDNPHasChain]>;
|
||||
def strict_fsetccs : SDNode<"ISD::STRICT_FSETCCS", SDTSetCC, [SDNPHasChain]>;
|
||||
|
||||
@ -1591,6 +1598,12 @@ def any_f16_to_fp : PatFrags<(ops node:$src),
|
||||
def any_fp_to_f16 : PatFrags<(ops node:$src),
|
||||
[(fp_to_f16 node:$src),
|
||||
(strict_fp_to_f16 node:$src)]>;
|
||||
def any_bf16_to_fp : PatFrags<(ops node:$src),
|
||||
[(bf16_to_fp node:$src),
|
||||
(strict_bf16_to_fp node:$src)]>;
|
||||
def any_fp_to_bf16 : PatFrags<(ops node:$src),
|
||||
[(fp_to_bf16 node:$src),
|
||||
(strict_fp_to_bf16 node:$src)]>;
|
||||
|
||||
multiclass binary_atomic_op_ord {
|
||||
def NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),
|
||||
|
@ -1047,6 +1047,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
|
||||
Node->getOperand(0).getValueType());
|
||||
break;
|
||||
case ISD::STRICT_FP_TO_FP16:
|
||||
case ISD::STRICT_FP_TO_BF16:
|
||||
case ISD::STRICT_SINT_TO_FP:
|
||||
case ISD::STRICT_UINT_TO_FP:
|
||||
case ISD::STRICT_LRINT:
|
||||
@ -3645,14 +3646,14 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
|
||||
DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
|
||||
}
|
||||
break;
|
||||
case ISD::STRICT_BF16_TO_FP:
|
||||
case ISD::STRICT_FP16_TO_FP:
|
||||
if (Node->getValueType(0) != MVT::f32) {
|
||||
// We can extend to types bigger than f32 in two steps without changing
|
||||
// the result. Since "f16 -> f32" is much more commonly available, give
|
||||
// CodeGen the option of emitting that before resorting to a libcall.
|
||||
SDValue Res =
|
||||
DAG.getNode(ISD::STRICT_FP16_TO_FP, dl, {MVT::f32, MVT::Other},
|
||||
{Node->getOperand(0), Node->getOperand(1)});
|
||||
SDValue Res = DAG.getNode(Node->getOpcode(), dl, {MVT::f32, MVT::Other},
|
||||
{Node->getOperand(0), Node->getOperand(1)});
|
||||
Res = DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
|
||||
{Node->getValueType(0), MVT::Other},
|
||||
{Res.getValue(1), Res});
|
||||
@ -4651,6 +4652,16 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
|
||||
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false).first);
|
||||
}
|
||||
break;
|
||||
case ISD::STRICT_BF16_TO_FP:
|
||||
if (Node->getValueType(0) == MVT::f32) {
|
||||
TargetLowering::MakeLibCallOptions CallOptions;
|
||||
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(
|
||||
DAG, RTLIB::FPEXT_BF16_F32, MVT::f32, Node->getOperand(1),
|
||||
CallOptions, SDLoc(Node), Node->getOperand(0));
|
||||
Results.push_back(Tmp.first);
|
||||
Results.push_back(Tmp.second);
|
||||
}
|
||||
break;
|
||||
case ISD::STRICT_FP16_TO_FP: {
|
||||
if (Node->getValueType(0) == MVT::f32) {
|
||||
TargetLowering::MakeLibCallOptions CallOptions;
|
||||
@ -4792,12 +4803,17 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
|
||||
break;
|
||||
}
|
||||
case ISD::STRICT_FP_EXTEND:
|
||||
case ISD::STRICT_FP_TO_FP16: {
|
||||
RTLIB::Libcall LC =
|
||||
Node->getOpcode() == ISD::STRICT_FP_TO_FP16
|
||||
? RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16)
|
||||
: RTLIB::getFPEXT(Node->getOperand(1).getValueType(),
|
||||
Node->getValueType(0));
|
||||
case ISD::STRICT_FP_TO_FP16:
|
||||
case ISD::STRICT_FP_TO_BF16: {
|
||||
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
|
||||
if (Node->getOpcode() == ISD::STRICT_FP_TO_FP16)
|
||||
LC = RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16);
|
||||
else if (Node->getOpcode() == ISD::STRICT_FP_TO_BF16)
|
||||
LC = RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::bf16);
|
||||
else
|
||||
LC = RTLIB::getFPEXT(Node->getOperand(1).getValueType(),
|
||||
Node->getValueType(0));
|
||||
|
||||
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall");
|
||||
|
||||
TargetLowering::MakeLibCallOptions CallOptions;
|
||||
|
@ -918,6 +918,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
|
||||
case ISD::STRICT_FP_TO_FP16:
|
||||
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
|
||||
case ISD::FP_TO_BF16:
|
||||
case ISD::STRICT_FP_TO_BF16:
|
||||
case ISD::STRICT_FP_ROUND:
|
||||
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
|
||||
case ISD::STRICT_FP_TO_SINT:
|
||||
@ -970,6 +971,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
|
||||
assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 ||
|
||||
N->getOpcode() == ISD::STRICT_FP_TO_FP16 ||
|
||||
N->getOpcode() == ISD::FP_TO_BF16 ||
|
||||
N->getOpcode() == ISD::STRICT_FP_TO_BF16 ||
|
||||
N->getOpcode() == ISD::STRICT_FP_ROUND);
|
||||
|
||||
bool IsStrict = N->isStrictFPOpcode();
|
||||
@ -980,7 +982,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
|
||||
if (N->getOpcode() == ISD::FP_TO_FP16 ||
|
||||
N->getOpcode() == ISD::STRICT_FP_TO_FP16)
|
||||
FloatRVT = MVT::f16;
|
||||
else if (N->getOpcode() == ISD::FP_TO_BF16)
|
||||
else if (N->getOpcode() == ISD::FP_TO_BF16 ||
|
||||
N->getOpcode() == ISD::STRICT_FP_TO_BF16)
|
||||
FloatRVT = MVT::bf16;
|
||||
|
||||
RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);
|
||||
@ -2193,13 +2196,11 @@ static ISD::NodeType GetPromotionOpcodeStrict(EVT OpVT, EVT RetVT) {
|
||||
if (RetVT == MVT::f16)
|
||||
return ISD::STRICT_FP_TO_FP16;
|
||||
|
||||
if (OpVT == MVT::bf16) {
|
||||
// TODO: return ISD::STRICT_BF16_TO_FP;
|
||||
}
|
||||
if (OpVT == MVT::bf16)
|
||||
return ISD::STRICT_BF16_TO_FP;
|
||||
|
||||
if (RetVT == MVT::bf16) {
|
||||
// TODO: return ISD::STRICT_FP_TO_BF16;
|
||||
}
|
||||
if (RetVT == MVT::bf16)
|
||||
return ISD::STRICT_FP_TO_BF16;
|
||||
|
||||
report_fatal_error("Attempt at an invalid promotion-related conversion");
|
||||
}
|
||||
@ -2999,10 +3000,16 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) {
|
||||
EVT SVT = N->getOperand(0).getValueType();
|
||||
|
||||
if (N->isStrictFPOpcode()) {
|
||||
assert(RVT == MVT::f16);
|
||||
SDValue Res =
|
||||
DAG.getNode(ISD::STRICT_FP_TO_FP16, SDLoc(N), {MVT::i16, MVT::Other},
|
||||
{N->getOperand(0), N->getOperand(1)});
|
||||
// FIXME: assume we only have two f16 variants for now.
|
||||
unsigned Opcode;
|
||||
if (RVT == MVT::f16)
|
||||
Opcode = ISD::STRICT_FP_TO_FP16;
|
||||
else if (RVT == MVT::bf16)
|
||||
Opcode = ISD::STRICT_FP_TO_BF16;
|
||||
else
|
||||
llvm_unreachable("unknown half type");
|
||||
SDValue Res = DAG.getNode(Opcode, SDLoc(N), {MVT::i16, MVT::Other},
|
||||
{N->getOperand(0), N->getOperand(1)});
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
return Res;
|
||||
}
|
||||
@ -3192,10 +3199,16 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
|
||||
Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0));
|
||||
|
||||
if (IsStrict) {
|
||||
assert(SVT == MVT::f16);
|
||||
unsigned Opcode;
|
||||
if (SVT == MVT::f16)
|
||||
Opcode = ISD::STRICT_FP16_TO_FP;
|
||||
else if (SVT == MVT::bf16)
|
||||
Opcode = ISD::STRICT_BF16_TO_FP;
|
||||
else
|
||||
llvm_unreachable("unknown half type");
|
||||
SDValue Res =
|
||||
DAG.getNode(ISD::STRICT_FP16_TO_FP, SDLoc(N),
|
||||
{N->getValueType(0), MVT::Other}, {N->getOperand(0), Op});
|
||||
DAG.getNode(Opcode, SDLoc(N), {N->getValueType(0), MVT::Other},
|
||||
{N->getOperand(0), Op});
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
ReplaceValueWith(SDValue(N, 0), Res);
|
||||
return SDValue();
|
||||
|
@ -165,6 +165,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::FP_TO_FP16:
|
||||
Res = PromoteIntRes_FP_TO_FP16_BF16(N);
|
||||
break;
|
||||
case ISD::STRICT_FP_TO_BF16:
|
||||
case ISD::STRICT_FP_TO_FP16:
|
||||
Res = PromoteIntRes_STRICT_FP_TO_FP16_BF16(N);
|
||||
break;
|
||||
|
@ -380,7 +380,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
|
||||
case ISD::FP_TO_FP16: return "fp_to_fp16";
|
||||
case ISD::STRICT_FP_TO_FP16: return "strict_fp_to_fp16";
|
||||
case ISD::BF16_TO_FP: return "bf16_to_fp";
|
||||
case ISD::STRICT_BF16_TO_FP: return "strict_bf16_to_fp";
|
||||
case ISD::FP_TO_BF16: return "fp_to_bf16";
|
||||
case ISD::STRICT_FP_TO_BF16: return "strict_fp_to_bf16";
|
||||
case ISD::LROUND: return "lround";
|
||||
case ISD::STRICT_LROUND: return "strict_lround";
|
||||
case ISD::LLROUND: return "llround";
|
||||
|
@ -307,6 +307,9 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
|
||||
} else if (OpVT == MVT::f80) {
|
||||
if (RetVT == MVT::f128)
|
||||
return FPEXT_F80_F128;
|
||||
} else if (OpVT == MVT::bf16) {
|
||||
if (RetVT == MVT::f32)
|
||||
return FPEXT_BF16_F32;
|
||||
}
|
||||
|
||||
return UNKNOWN_LIBCALL;
|
||||
|
@ -406,6 +406,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(Op, MVT::f128, Expand);
|
||||
}
|
||||
|
||||
for (auto VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
|
||||
setOperationAction(ISD::STRICT_FP_TO_BF16, VT, Expand);
|
||||
setOperationAction(ISD::STRICT_BF16_TO_FP, VT, Expand);
|
||||
}
|
||||
|
||||
for (MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
|
||||
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
|
||||
|
181
llvm/test/CodeGen/X86/bfloat-constrained.ll
Normal file
181
llvm/test/CodeGen/X86/bfloat-constrained.ll
Normal file
@ -0,0 +1,181 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefix=X86
|
||||
; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefixes=X64
|
||||
; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512bf16,+avx512vl | FileCheck %s --check-prefixes=X64
|
||||
|
||||
@a = global bfloat 0xR0000, align 2
|
||||
@b = global bfloat 0xR0000, align 2
|
||||
@c = global bfloat 0xR0000, align 2
|
||||
|
||||
define float @bfloat_to_float() strictfp {
|
||||
; X86-LABEL: bfloat_to_float:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: subl $12, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-NEXT: movzwl a, %eax
|
||||
; X86-NEXT: movl %eax, (%esp)
|
||||
; X86-NEXT: calll __extendbfsf2
|
||||
; X86-NEXT: addl $12, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: bfloat_to_float:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 16
|
||||
; X64-NEXT: movq a@GOTPCREL(%rip), %rax
|
||||
; X64-NEXT: movzwl (%rax), %edi
|
||||
; X64-NEXT: callq __extendbfsf2@PLT
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 8
|
||||
; X64-NEXT: retq
|
||||
%1 = load bfloat, ptr @a, align 2
|
||||
%2 = tail call float @llvm.experimental.constrained.fpext.f32.bfloat(bfloat %1, metadata !"fpexcept.strict") #0
|
||||
ret float %2
|
||||
}
|
||||
|
||||
define double @bfloat_to_double() strictfp {
|
||||
; X86-LABEL: bfloat_to_double:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: subl $12, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-NEXT: movzwl a, %eax
|
||||
; X86-NEXT: movl %eax, (%esp)
|
||||
; X86-NEXT: calll __extendbfsf2
|
||||
; X86-NEXT: addl $12, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: bfloat_to_double:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 16
|
||||
; X64-NEXT: movq a@GOTPCREL(%rip), %rax
|
||||
; X64-NEXT: movzwl (%rax), %edi
|
||||
; X64-NEXT: callq __extendbfsf2@PLT
|
||||
; X64-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 8
|
||||
; X64-NEXT: retq
|
||||
%1 = load bfloat, ptr @a, align 2
|
||||
%2 = tail call double @llvm.experimental.constrained.fpext.f64.bfloat(bfloat %1, metadata !"fpexcept.strict") #0
|
||||
ret double %2
|
||||
}
|
||||
|
||||
define void @float_to_bfloat(float %0) strictfp {
|
||||
; X86-LABEL: float_to_bfloat:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: subl $12, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fstps (%esp)
|
||||
; X86-NEXT: wait
|
||||
; X86-NEXT: calll __truncsfbf2
|
||||
; X86-NEXT: movw %ax, a
|
||||
; X86-NEXT: addl $12, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: float_to_bfloat:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 16
|
||||
; X64-NEXT: callq __truncsfbf2@PLT
|
||||
; X64-NEXT: movq a@GOTPCREL(%rip), %rcx
|
||||
; X64-NEXT: movw %ax, (%rcx)
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 8
|
||||
; X64-NEXT: retq
|
||||
%2 = tail call bfloat @llvm.experimental.constrained.fptrunc.bfloat.f32(float %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
|
||||
store bfloat %2, ptr @a, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @double_to_bfloat(double %0) strictfp {
|
||||
; X86-LABEL: double_to_bfloat:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: subl $12, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-NEXT: fldl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fstpl (%esp)
|
||||
; X86-NEXT: wait
|
||||
; X86-NEXT: calll __truncdfbf2
|
||||
; X86-NEXT: movw %ax, a
|
||||
; X86-NEXT: addl $12, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: double_to_bfloat:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 16
|
||||
; X64-NEXT: callq __truncdfbf2@PLT
|
||||
; X64-NEXT: movq a@GOTPCREL(%rip), %rcx
|
||||
; X64-NEXT: movw %ax, (%rcx)
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 8
|
||||
; X64-NEXT: retq
|
||||
%2 = tail call bfloat @llvm.experimental.constrained.fptrunc.bfloat.f64(double %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
|
||||
store bfloat %2, ptr @a, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @add() strictfp {
|
||||
; X86-LABEL: add:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: subl $12, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-NEXT: movzwl a, %eax
|
||||
; X86-NEXT: movl %eax, (%esp)
|
||||
; X86-NEXT: calll __extendbfsf2
|
||||
; X86-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
|
||||
; X86-NEXT: wait
|
||||
; X86-NEXT: movzwl b, %eax
|
||||
; X86-NEXT: movl %eax, (%esp)
|
||||
; X86-NEXT: calll __extendbfsf2
|
||||
; X86-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
|
||||
; X86-NEXT: faddp %st, %st(1)
|
||||
; X86-NEXT: fstps (%esp)
|
||||
; X86-NEXT: wait
|
||||
; X86-NEXT: calll __truncsfbf2
|
||||
; X86-NEXT: movw %ax, c
|
||||
; X86-NEXT: addl $12, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: add:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 16
|
||||
; X64-NEXT: movq a@GOTPCREL(%rip), %rax
|
||||
; X64-NEXT: movzwl (%rax), %edi
|
||||
; X64-NEXT: callq __extendbfsf2@PLT
|
||||
; X64-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; X64-NEXT: movq b@GOTPCREL(%rip), %rax
|
||||
; X64-NEXT: movzwl (%rax), %edi
|
||||
; X64-NEXT: callq __extendbfsf2@PLT
|
||||
; X64-NEXT: vaddss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
|
||||
; X64-NEXT: callq __truncsfbf2@PLT
|
||||
; X64-NEXT: movq c@GOTPCREL(%rip), %rcx
|
||||
; X64-NEXT: movw %ax, (%rcx)
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 8
|
||||
; X64-NEXT: retq
|
||||
%1 = load bfloat, ptr @a, align 2
|
||||
%2 = tail call float @llvm.experimental.constrained.fpext.f32.bfloat(bfloat %1, metadata !"fpexcept.strict") #0
|
||||
%3 = load bfloat, ptr @b, align 2
|
||||
%4 = tail call float @llvm.experimental.constrained.fpext.f32.bfloat(bfloat %3, metadata !"fpexcept.strict") #0
|
||||
%5 = tail call float @llvm.experimental.constrained.fadd.f32(float %2, float %4, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
|
||||
%6 = tail call bfloat @llvm.experimental.constrained.fptrunc.bfloat.f32(float %5, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
|
||||
store bfloat %6, ptr @c, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.experimental.constrained.fpext.f32.bfloat(bfloat, metadata)
|
||||
declare double @llvm.experimental.constrained.fpext.f64.bfloat(bfloat, metadata)
|
||||
declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
|
||||
declare bfloat @llvm.experimental.constrained.fptrunc.bfloat.f32(float, metadata, metadata)
|
||||
declare bfloat @llvm.experimental.constrained.fptrunc.bfloat.f64(double, metadata, metadata)
|
||||
|
||||
attributes #0 = { strictfp }
|
||||
|
Loading…
x
Reference in New Issue
Block a user