[libc][math] Fix log1p SEGV with large inputs when FTZ/DAZ flags are set. (#115541)

This commit is contained in:
lntue 2024-11-09 06:38:12 -08:00 committed by GitHub
parent 6beaa123a2
commit 1d41543c95
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 25 additions and 15 deletions

View File

@ -822,8 +822,8 @@ constexpr Float128 BIG_COEFFS[4]{
{Sign::NEG, -128, 0x80000000'00000000'00000000'00000000_u128}, {Sign::NEG, -128, 0x80000000'00000000'00000000'00000000_u128},
}; };
LIBC_INLINE double log1p_accurate(int e_x, int index, [[maybe_unused]] LIBC_INLINE double log1p_accurate(int e_x, int index,
fputil::DoubleDouble m_x) { fputil::DoubleDouble m_x) {
Float128 e_x_f128(static_cast<float>(e_x)); Float128 e_x_f128(static_cast<float>(e_x));
Float128 sum = fputil::quick_mul(LOG_2, e_x_f128); Float128 sum = fputil::quick_mul(LOG_2, e_x_f128);
sum = fputil::quick_add(sum, LOG_R1[index]); sum = fputil::quick_add(sum, LOG_R1[index]);
@ -882,7 +882,6 @@ LLVM_LIBC_FUNCTION(double, log1p, (double x)) {
constexpr int EXP_BIAS = FPBits_t::EXP_BIAS; constexpr int EXP_BIAS = FPBits_t::EXP_BIAS;
constexpr int FRACTION_LEN = FPBits_t::FRACTION_LEN; constexpr int FRACTION_LEN = FPBits_t::FRACTION_LEN;
constexpr uint64_t FRACTION_MASK = FPBits_t::FRACTION_MASK;
FPBits_t xbits(x); FPBits_t xbits(x);
uint64_t x_u = xbits.uintval(); uint64_t x_u = xbits.uintval();
@ -954,12 +953,12 @@ LLVM_LIBC_FUNCTION(double, log1p, (double x)) {
// |x_dd.lo| < ulp(x_dd.hi) // |x_dd.lo| < ulp(x_dd.hi)
FPBits_t xhi_bits(x_dd.hi); FPBits_t xhi_bits(x_dd.hi);
uint64_t xhi_frac = xhi_bits.get_mantissa();
x_u = xhi_bits.uintval(); x_u = xhi_bits.uintval();
// Range reduction: // Range reduction:
// Find k such that |x_hi - k * 2^-7| <= 2^-8. // Find k such that |x_hi - k * 2^-7| <= 2^-8.
int idx = int idx = static_cast<int>((xhi_frac + (1ULL << (FRACTION_LEN - 8))) >>
static_cast<int>(((x_u & FRACTION_MASK) + (1ULL << (FRACTION_LEN - 8))) >> (FRACTION_LEN - 7));
(FRACTION_LEN - 7));
int x_e = xhi_bits.get_exponent() + (idx >> 7); int x_e = xhi_bits.get_exponent() + (idx >> 7);
double e_x = static_cast<double>(x_e); double e_x = static_cast<double>(x_e);
@ -974,17 +973,21 @@ LLVM_LIBC_FUNCTION(double, log1p, (double x)) {
constexpr double ERR_HI[2] = {0x1.0p-85, 0.0}; constexpr double ERR_HI[2] = {0x1.0p-85, 0.0};
double err_hi = ERR_HI[hi == 0.0]; double err_hi = ERR_HI[hi == 0.0];
// Scaling factior = 2^(-xh_bits.get_exponent()) // Scale x_dd by 2^(-xh_bits.get_exponent()).
uint64_t s_u = (static_cast<uint64_t>(EXP_BIAS) << (FRACTION_LEN + 1)) - int64_t s_u = static_cast<int64_t>(x_u & FPBits_t::EXP_MASK) -
(x_u & FPBits_t::EXP_MASK); (static_cast<int64_t>(EXP_BIAS) << FRACTION_LEN);
// When the exponent of x is 2^1023, its inverse, 2^(-1023), is subnormal.
const double EXPONENT_CORRECTION[2] = {0.0, 0x1.0p-1023};
double scaling = FPBits_t(s_u).get_val() + EXPONENT_CORRECTION[s_u == 0];
// Normalize arguments: // Normalize arguments:
// 1 <= m_dd.hi < 2 // 1 <= m_dd.hi < 2
// |m_dd.lo| < 2^-52. // |m_dd.lo| < 2^-52.
// This is exact. // This is exact.
fputil::DoubleDouble m_dd{scaling * x_dd.lo, scaling * x_dd.hi}; uint64_t m_hi = FPBits_t::one().uintval() | xhi_frac;
uint64_t m_lo =
FPBits_t(x_dd.lo).abs().get_val() > x_dd.hi * 0x1.0p-127
? static_cast<uint64_t>(cpp::bit_cast<int64_t>(x_dd.lo) - s_u)
: 0;
fputil::DoubleDouble m_dd{FPBits_t(m_lo).get_val(), FPBits_t(m_hi).get_val()};
// Perform range reduction: // Perform range reduction:
// r * m - 1 = r * (m_dd.hi + m_dd.lo) - 1 // r * m - 1 = r * (m_dd.hi + m_dd.lo) - 1

View File

@ -13,8 +13,6 @@
#include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h" #include "test/UnitTest/Test.h"
#include <stdint.h>
using LlvmLibcLog1pTest = LIBC_NAMESPACE::testing::FPTest<double>; using LlvmLibcLog1pTest = LIBC_NAMESPACE::testing::FPTest<double>;
TEST_F(LlvmLibcLog1pTest, SpecialNumbers) { TEST_F(LlvmLibcLog1pTest, SpecialNumbers) {
@ -26,6 +24,9 @@ TEST_F(LlvmLibcLog1pTest, SpecialNumbers) {
EXPECT_FP_EQ(neg_zero, LIBC_NAMESPACE::log1p(-0.0)); EXPECT_FP_EQ(neg_zero, LIBC_NAMESPACE::log1p(-0.0));
EXPECT_FP_EQ_WITH_EXCEPTION(neg_inf, LIBC_NAMESPACE::log1p(-1.0), EXPECT_FP_EQ_WITH_EXCEPTION(neg_inf, LIBC_NAMESPACE::log1p(-1.0),
FE_DIVBYZERO); FE_DIVBYZERO);
EXPECT_FP_EQ(0x1.62c829bf8fd9dp9,
LIBC_NAMESPACE::log1p(0x1.9b536cac3a09dp1023));
} }
#ifdef LIBC_TEST_FTZ_DAZ #ifdef LIBC_TEST_FTZ_DAZ
@ -36,18 +37,24 @@ TEST_F(LlvmLibcLog1pTest, FTZMode) {
ModifyMXCSR mxcsr(FTZ); ModifyMXCSR mxcsr(FTZ);
EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::log1p(min_denormal)); EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::log1p(min_denormal));
EXPECT_FP_EQ(0x1.62c829bf8fd9dp9,
LIBC_NAMESPACE::log1p(0x1.9b536cac3a09dp1023));
} }
TEST_F(LlvmLibcLog1pTest, DAZMode) { TEST_F(LlvmLibcLog1pTest, DAZMode) {
ModifyMXCSR mxcsr(DAZ); ModifyMXCSR mxcsr(DAZ);
EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::log1p(min_denormal)); EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::log1p(min_denormal));
EXPECT_FP_EQ(0x1.62c829bf8fd9dp9,
LIBC_NAMESPACE::log1p(0x1.9b536cac3a09dp1023));
} }
TEST_F(LlvmLibcLog1pTest, FTZDAZMode) { TEST_F(LlvmLibcLog1pTest, FTZDAZMode) {
ModifyMXCSR mxcsr(FTZ | DAZ); ModifyMXCSR mxcsr(FTZ | DAZ);
EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::log1p(min_denormal)); EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::log1p(min_denormal));
EXPECT_FP_EQ(0x1.62c829bf8fd9dp9,
LIBC_NAMESPACE::log1p(0x1.9b536cac3a09dp1023));
} }
#endif #endif