[libc][math] Refactor hypotf16 to Header Only (#180511).
closes #175337 part of #175336
This commit is contained in:
parent
20605617ac
commit
8ccc40ee0e
@ -76,6 +76,7 @@
|
||||
#include "math/fsqrtf128.h"
|
||||
#include "math/fsqrtl.h"
|
||||
#include "math/hypotf.h"
|
||||
#include "math/hypotf16.h"
|
||||
#include "math/ilogb.h"
|
||||
#include "math/ilogbf.h"
|
||||
#include "math/ilogbf128.h"
|
||||
|
||||
31
libc/shared/math/hypotf16.h
Normal file
31
libc/shared/math/hypotf16.h
Normal file
@ -0,0 +1,31 @@
|
||||
//===-- Shared hypotf16 function --------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIBC_SHARED_MATH_HYPOTF16_H
|
||||
#define LLVM_LIBC_SHARED_MATH_HYPOTF16_H
|
||||
|
||||
#include "include/llvm-libc-macros/float16-macros.h"
|
||||
#include "shared/libc_common.h"
|
||||
|
||||
#ifdef LIBC_TYPES_HAS_FLOAT16
|
||||
|
||||
#include "src/__support/math/hypotf16.h"
|
||||
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
|
||||
namespace shared {
|
||||
|
||||
using math::hypotf16;
|
||||
|
||||
} // namespace shared
|
||||
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
|
||||
#endif // LIBC_TYPES_HAS_FLOAT16
|
||||
|
||||
#endif // LLVM_LIBC_SHARED_MATH_HYPOTF16_H
|
||||
@ -1164,6 +1164,21 @@ add_header_library(
|
||||
libc.src.__support.macros.optimization
|
||||
)
|
||||
|
||||
add_header_library(
|
||||
hypotf16
|
||||
HDRS
|
||||
hypotf16.h
|
||||
DEPENDS
|
||||
libc.src.__support.FPUtil.fenv_impl
|
||||
libc.src.__support.FPUtil.fp_bits
|
||||
libc.src.__support.FPUtil.cast
|
||||
libc.src.__support.FPUtil.multiply_add
|
||||
libc.src.__support.FPUtil.sqrt
|
||||
libc.src.__support.macros.optimization
|
||||
libc.src.__support.macros.properties.types
|
||||
libc.include.llvm-libc-macros.float16_macros
|
||||
)
|
||||
|
||||
add_header_library(
|
||||
ilogbl
|
||||
HDRS
|
||||
|
||||
99
libc/src/__support/math/hypotf16.h
Normal file
99
libc/src/__support/math/hypotf16.h
Normal file
@ -0,0 +1,99 @@
|
||||
//===-- Implementation header for hypotf16 ----------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_HYPOTF16_H
|
||||
#define LLVM_LIBC_SRC___SUPPORT_MATH_HYPOTF16_H
|
||||
|
||||
#include "src/__support/FPUtil/FEnvImpl.h"
|
||||
#include "src/__support/FPUtil/FPBits.h"
|
||||
#include "src/__support/FPUtil/cast.h"
|
||||
#include "src/__support/FPUtil/multiply_add.h"
|
||||
#include "src/__support/FPUtil/sqrt.h"
|
||||
#include "src/__support/common.h"
|
||||
#include "src/__support/macros/optimization.h"
|
||||
#include "src/__support/macros/properties/types.h"
|
||||
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
|
||||
namespace math {
|
||||
|
||||
// For targets where conversion from float to float16 has to be
|
||||
// emulated, fputil::hypot<float16> is faster
|
||||
LIBC_INLINE float16 hypotf16(float16 x, float16 y) {
|
||||
using FloatBits = fputil::FPBits<float>;
|
||||
using FPBits = fputil::FPBits<float16>;
|
||||
|
||||
FPBits x_abs = FPBits(x).abs();
|
||||
FPBits y_abs = FPBits(y).abs();
|
||||
|
||||
bool x_abs_larger = x_abs.uintval() >= y_abs.uintval();
|
||||
|
||||
FPBits a_bits = x_abs_larger ? x_abs : y_abs;
|
||||
FPBits b_bits = x_abs_larger ? y_abs : x_abs;
|
||||
|
||||
uint16_t a_u = a_bits.uintval();
|
||||
uint16_t b_u = b_bits.uintval();
|
||||
|
||||
// Note: replacing `a_u >= FPBits::EXP_MASK` with `a_bits.is_inf_or_nan()`
|
||||
// generates extra exponent bit masking instructions on x86-64.
|
||||
if (LIBC_UNLIKELY(a_u >= FPBits::EXP_MASK)) {
|
||||
// x or y is inf or nan
|
||||
if (a_bits.is_signaling_nan() || b_bits.is_signaling_nan()) {
|
||||
fputil::raise_except_if_required(FE_INVALID);
|
||||
return FPBits::quiet_nan().get_val();
|
||||
}
|
||||
if (a_bits.is_inf() || b_bits.is_inf())
|
||||
return FPBits::inf().get_val();
|
||||
return a_bits.get_val();
|
||||
}
|
||||
|
||||
float af = fputil::cast<float>(a_bits.get_val());
|
||||
float bf = fputil::cast<float>(b_bits.get_val());
|
||||
|
||||
// Compiler runtime basic operations for float16 might not be correctly
|
||||
// rounded for all rounding modes.
|
||||
if (LIBC_UNLIKELY(a_u - b_u >=
|
||||
static_cast<uint16_t>((FPBits::FRACTION_LEN + 2)
|
||||
<< FPBits::FRACTION_LEN)))
|
||||
return fputil::cast<float16>(af + bf);
|
||||
|
||||
// These squares are exact.
|
||||
float a_sq = af * af;
|
||||
float sum_sq = fputil::multiply_add(bf, bf, a_sq);
|
||||
|
||||
FloatBits result(fputil::sqrt<float>(sum_sq));
|
||||
uint32_t r_u = result.uintval();
|
||||
|
||||
// If any of the sticky bits of the result are non-zero, except the LSB, then
|
||||
// the rounded result is correct.
|
||||
if (LIBC_UNLIKELY(((r_u + 1) & 0x0000'0FFE) == 0)) {
|
||||
float r_d = result.get_val();
|
||||
|
||||
// Perform rounding correction.
|
||||
float sum_sq_lo = fputil::multiply_add(bf, bf, a_sq - sum_sq);
|
||||
float err = sum_sq_lo - fputil::multiply_add(r_d, r_d, -sum_sq);
|
||||
|
||||
if (err > 0) {
|
||||
r_u |= 1;
|
||||
} else if ((err < 0) && (r_u & 1) == 0) {
|
||||
r_u -= 1;
|
||||
} else if ((r_u & 0x0000'1FFF) == 0) {
|
||||
// The rounded result is exact.
|
||||
fputil::clear_except_if_required(FE_INEXACT);
|
||||
}
|
||||
return fputil::cast<float16>(FloatBits(r_u).get_val());
|
||||
}
|
||||
|
||||
return fputil::cast<float16>(result.get_val());
|
||||
}
|
||||
|
||||
} // namespace math
|
||||
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
|
||||
#endif // LLVM_LIBC_SRC___SUPPORT_MATH_HYPOTF16_H
|
||||
@ -3125,13 +3125,7 @@ add_entrypoint_object(
|
||||
HDRS
|
||||
../hypotf16.h
|
||||
DEPENDS
|
||||
libc.src.__support.FPUtil.fenv_impl
|
||||
libc.src.__support.FPUtil.fp_bits
|
||||
libc.src.__support.FPUtil.cast
|
||||
libc.src.__support.FPUtil.multiply_add
|
||||
libc.src.__support.FPUtil.sqrt
|
||||
libc.src.__support.macros.optimization
|
||||
libc.src.__support.macros.properties.types
|
||||
libc.src.__support.math.hypotf16
|
||||
)
|
||||
|
||||
add_entrypoint_object(
|
||||
|
||||
@ -7,85 +7,12 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "src/math/hypotf16.h"
|
||||
#include "src/__support/FPUtil/FEnvImpl.h"
|
||||
#include "src/__support/FPUtil/FPBits.h"
|
||||
#include "src/__support/FPUtil/cast.h"
|
||||
#include "src/__support/FPUtil/multiply_add.h"
|
||||
#include "src/__support/FPUtil/sqrt.h"
|
||||
#include "src/__support/common.h"
|
||||
#include "src/__support/macros/optimization.h"
|
||||
#include "src/__support/macros/properties/types.h"
|
||||
#include "src/__support/math/hypotf16.h"
|
||||
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
|
||||
// For targets where conversion from float to float16 has to be
|
||||
// emulated, fputil::hypot<float16> is faster
|
||||
LLVM_LIBC_FUNCTION(float16, hypotf16, (float16 x, float16 y)) {
|
||||
using FloatBits = fputil::FPBits<float>;
|
||||
using FPBits = fputil::FPBits<float16>;
|
||||
|
||||
FPBits x_abs = FPBits(x).abs();
|
||||
FPBits y_abs = FPBits(y).abs();
|
||||
|
||||
bool x_abs_larger = x_abs.uintval() >= y_abs.uintval();
|
||||
|
||||
FPBits a_bits = x_abs_larger ? x_abs : y_abs;
|
||||
FPBits b_bits = x_abs_larger ? y_abs : x_abs;
|
||||
|
||||
uint16_t a_u = a_bits.uintval();
|
||||
uint16_t b_u = b_bits.uintval();
|
||||
|
||||
// Note: replacing `a_u >= FPBits::EXP_MASK` with `a_bits.is_inf_or_nan()`
|
||||
// generates extra exponent bit masking instructions on x86-64.
|
||||
if (LIBC_UNLIKELY(a_u >= FPBits::EXP_MASK)) {
|
||||
// x or y is inf or nan
|
||||
if (a_bits.is_signaling_nan() || b_bits.is_signaling_nan()) {
|
||||
fputil::raise_except_if_required(FE_INVALID);
|
||||
return FPBits::quiet_nan().get_val();
|
||||
}
|
||||
if (a_bits.is_inf() || b_bits.is_inf())
|
||||
return FPBits::inf().get_val();
|
||||
return a_bits.get_val();
|
||||
}
|
||||
|
||||
float af = fputil::cast<float>(a_bits.get_val());
|
||||
float bf = fputil::cast<float>(b_bits.get_val());
|
||||
|
||||
// Compiler runtime basic operations for float16 might not be correctly
|
||||
// rounded for all rounding modes.
|
||||
if (LIBC_UNLIKELY(a_u - b_u >=
|
||||
static_cast<uint16_t>((FPBits::FRACTION_LEN + 2)
|
||||
<< FPBits::FRACTION_LEN)))
|
||||
return fputil::cast<float16>(af + bf);
|
||||
|
||||
// These squares are exact.
|
||||
float a_sq = af * af;
|
||||
float sum_sq = fputil::multiply_add(bf, bf, a_sq);
|
||||
|
||||
FloatBits result(fputil::sqrt<float>(sum_sq));
|
||||
uint32_t r_u = result.uintval();
|
||||
|
||||
// If any of the sticky bits of the result are non-zero, except the LSB, then
|
||||
// the rounded result is correct.
|
||||
if (LIBC_UNLIKELY(((r_u + 1) & 0x0000'0FFE) == 0)) {
|
||||
float r_d = result.get_val();
|
||||
|
||||
// Perform rounding correction.
|
||||
float sum_sq_lo = fputil::multiply_add(bf, bf, a_sq - sum_sq);
|
||||
float err = sum_sq_lo - fputil::multiply_add(r_d, r_d, -sum_sq);
|
||||
|
||||
if (err > 0) {
|
||||
r_u |= 1;
|
||||
} else if ((err < 0) && (r_u & 1) == 0) {
|
||||
r_u -= 1;
|
||||
} else if ((r_u & 0x0000'1FFF) == 0) {
|
||||
// The rounded result is exact.
|
||||
fputil::clear_except_if_required(FE_INEXACT);
|
||||
}
|
||||
return fputil::cast<float16>(FloatBits(r_u).get_val());
|
||||
}
|
||||
|
||||
return fputil::cast<float16>(result.get_val());
|
||||
return math::hypotf16(x, y);
|
||||
}
|
||||
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
|
||||
@ -72,6 +72,7 @@ add_fp_unittest(
|
||||
libc.src.__support.math.fsqrtf128
|
||||
libc.src.__support.math.fsqrtl
|
||||
libc.src.__support.math.hypotf
|
||||
libc.src.__support.math.hypotf16
|
||||
libc.src.__support.math.ilogb
|
||||
libc.src.__support.math.ilogbf
|
||||
libc.src.__support.math.ilogbf16
|
||||
|
||||
@ -33,6 +33,7 @@ TEST(LlvmLibcSharedMathTest, AllFloat16) {
|
||||
EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::exp2m1f16(0.0f16));
|
||||
EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::expf16(0.0f16));
|
||||
EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::expm1f16(0.0f16));
|
||||
EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::hypotf16(0.0f16, 0.0f16));
|
||||
EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::logf16(1.0f16));
|
||||
EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::sinhf16(0.0f16));
|
||||
|
||||
|
||||
@ -3867,6 +3867,23 @@ libc_support_library(
|
||||
],
|
||||
)
|
||||
|
||||
libc_support_library(
|
||||
name = "__support_math_hypotf16",
|
||||
hdrs = ["src/__support/math/hypotf16.h"],
|
||||
deps = [
|
||||
":__support_common",
|
||||
":__support_fputil_cast",
|
||||
":__support_fputil_fenv_impl",
|
||||
":__support_fputil_fp_bits",
|
||||
":__support_fputil_multiply_add",
|
||||
":__support_fputil_sqrt",
|
||||
":__support_macros_config",
|
||||
":__support_macros_optimization",
|
||||
":__support_macros_properties_types",
|
||||
":llvm_libc_macros_float16_macros",
|
||||
],
|
||||
)
|
||||
|
||||
libc_support_library(
|
||||
name = "__support_math_sinhf",
|
||||
hdrs = ["src/__support/math/sinhf.h"],
|
||||
@ -4906,8 +4923,7 @@ libc_math_function(
|
||||
libc_math_function(
|
||||
name = "hypotf16",
|
||||
additional_deps = [
|
||||
":__support_fputil_multiply_add",
|
||||
":__support_fputil_sqrt",
|
||||
":__support_math_hypotf16",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user