[APFloat] Properly implement DoubleAPFloat::roundToIntegral

The previous implementation did not correctly handle double-doubles like
0x1p100 + 0x1p1 as the low order component would need more than a
106-bit significand to represent.
This commit is contained in:
David Majnemer 2025-08-06 10:23:25 -07:00
parent 51e825dbfb
commit 0a23b22d1d
2 changed files with 595 additions and 65 deletions

View File

@ -4949,6 +4949,21 @@ DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
return *this; return *this;
} }
// Returns a result such that:
// 1. abs(Lo) <= ulp(Hi)/2
// 2. Hi == RTNE(Hi + Lo)
// 3. Hi + Lo == X + Y
//
// Requires that log2(X) >= log2(Y).
static std::pair<APFloat, APFloat> fastTwoSum(APFloat X, APFloat Y) {
if (!X.isFinite())
return {X, APFloat::getZero(X.getSemantics(), /*Negative=*/false)};
APFloat Hi = X + Y;
APFloat Delta = Hi - X;
APFloat Lo = Y - Delta;
return {Hi, Lo};
}
// Implement addition, subtraction, multiplication and division based on: // Implement addition, subtraction, multiplication and division based on:
// "Software for Doubled-Precision Floating-Point Computations", // "Software for Doubled-Precision Floating-Point Computations",
// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
@ -5218,10 +5233,78 @@ DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); const APFloat &Hi = getFirst();
auto Ret = Tmp.roundToIntegral(RM); const APFloat &Lo = getSecond();
*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
return Ret; APFloat RoundedHi = Hi;
const opStatus HiStatus = RoundedHi.roundToIntegral(RM);
// We can reduce the problem to just the high part if the input:
// 1. Represents a non-finite value.
// 2. Has a component which is zero.
if (!Hi.isFiniteNonZero() || Lo.isZero()) {
Floats[0] = std::move(RoundedHi);
Floats[1].makeZero(/*Neg=*/false);
return HiStatus;
}
// Adjust `Rounded` in the direction of `TieBreaker` if `ToRound` was at a
// halfway point.
auto RoundToNearestHelper = [](APFloat ToRound, APFloat Rounded,
APFloat TieBreaker) {
// RoundingError tells us which direction we rounded:
// - RoundingError > 0: we rounded up.
// - RoundingError < 0: we rounded down.
// Sterbenz' lemma ensures that RoundingError is exact.
const APFloat RoundingError = Rounded - ToRound;
if (TieBreaker.isNonZero() &&
TieBreaker.isNegative() != RoundingError.isNegative() &&
abs(RoundingError).isExactlyValue(0.5))
Rounded.add(
APFloat::getOne(Rounded.getSemantics(), TieBreaker.isNegative()),
rmNearestTiesToEven);
return Rounded;
};
// Case 1: Hi is not an integer.
// Special cases are for rounding modes that are sensitive to ties.
if (RoundedHi != Hi) {
// We need to consider the case where Hi was between two integers and the
// rounding mode broke the tie when, in fact, Lo may have had a different
// sign than Hi.
if (RM == rmNearestTiesToAway || RM == rmNearestTiesToEven)
RoundedHi = RoundToNearestHelper(Hi, RoundedHi, Lo);
Floats[0] = std::move(RoundedHi);
Floats[1].makeZero(/*Neg=*/false);
return HiStatus;
}
// Case 2: Hi is an integer.
// Special cases are for rounding modes which are rounding towards or away from zero.
RoundingMode LoRoundingMode;
if (RM == rmTowardZero)
// When our input is positive, we want the Lo component rounded toward
// negative infinity to get the smallest result magnitude. Likewise,
// negative inputs want the Lo component rounded toward positive infinity.
LoRoundingMode = isNegative() ? rmTowardPositive : rmTowardNegative;
else
LoRoundingMode = RM;
APFloat RoundedLo = Lo;
const opStatus LoStatus = RoundedLo.roundToIntegral(LoRoundingMode);
if (LoRoundingMode == rmNearestTiesToAway)
// We need to consider the case where Lo was between two integers and the
// rounding mode broke the tie when, in fact, Hi may have had a different
// sign than Lo.
RoundedLo = RoundToNearestHelper(Lo, RoundedLo, Hi);
// We must ensure that the final result has no overlap between the two APFloat values.
std::tie(RoundedHi, RoundedLo) = fastTwoSum(RoundedHi, RoundedLo);
Floats[0] = std::move(RoundedHi);
Floats[1] = std::move(RoundedLo);
return LoStatus;
} }
void DoubleAPFloat::changeSign() { void DoubleAPFloat::changeSign() {

View File

@ -16,9 +16,11 @@
#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/FormatVariadic.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include <cmath> #include <cmath>
#include <limits>
#include <ostream> #include <ostream>
#include <string> #include <string>
#include <tuple> #include <tuple>
#include <type_traits>
using namespace llvm; using namespace llvm;
@ -2661,6 +2663,39 @@ TEST(APFloatTest, Float8UZConvert) {
} }
} }
struct DD {
double Hi;
double Lo;
};
template <typename T, typename U>
static APFloat makeDoubleAPFloat(T Hi, U Lo) {
APFloat HiFloat{APFloat::IEEEdouble(), APFloat::uninitialized};
if constexpr (std::is_same_v<decltype(Hi), APFloat>) {
HiFloat = Hi;
} else if constexpr (std::is_same_v<decltype(Hi), double>) {
HiFloat = APFloat{Hi};
} else {
HiFloat = {APFloat::IEEEdouble(), Hi};
}
APFloat LoFloat{APFloat::IEEEdouble(), APFloat::uninitialized};
if constexpr (std::is_same_v<decltype(Lo), APFloat>) {
LoFloat = Lo;
} else if constexpr (std::is_same_v<decltype(Lo), double>) {
LoFloat = APFloat{Lo};
} else {
LoFloat = {APFloat::IEEEdouble(), Lo};
}
APInt Bits = LoFloat.bitcastToAPInt().concat(HiFloat.bitcastToAPInt());
return APFloat(APFloat::PPCDoubleDouble(), Bits);
}
static APFloat makeDoubleAPFloat(DD X) {
return makeDoubleAPFloat(X.Hi, X.Lo);
}
TEST(APFloatTest, PPCDoubleDouble) { TEST(APFloatTest, PPCDoubleDouble) {
APFloat test(APFloat::PPCDoubleDouble(), "1.0"); APFloat test(APFloat::PPCDoubleDouble(), "1.0");
EXPECT_EQ(0x3ff0000000000000ull, test.bitcastToAPInt().getRawData()[0]); EXPECT_EQ(0x3ff0000000000000ull, test.bitcastToAPInt().getRawData()[0]);
@ -5315,18 +5350,452 @@ TEST(APFloatTest, PPCDoubleDoubleFMA) {
APFloat(APFloat::PPCDoubleDouble(), "10").compare(A)); APFloat(APFloat::PPCDoubleDouble(), "10").compare(A));
} }
TEST(APFloatTest, PPCDoubleDoubleRoundToIntegral) { struct PPCDoubleDoubleRoundToIntegralTestCase {
{ DD Input;
APFloat A(APFloat::PPCDoubleDouble(), "1.5"); DD Rounded[5] = {};
A.roundToIntegral(APFloat::rmNearestTiesToEven); constexpr PPCDoubleDoubleRoundToIntegralTestCase &
EXPECT_EQ(APFloat::cmpEqual, withRounded(DD R, APFloat::roundingMode RM) {
APFloat(APFloat::PPCDoubleDouble(), "2").compare(A)); Rounded[static_cast<std::underlying_type_t<APFloat::roundingMode>>(RM)] = R;
return *this;
} }
{ };
APFloat A(APFloat::PPCDoubleDouble(), "2.5");
A.roundToIntegral(APFloat::rmNearestTiesToEven); auto ppcDoubleDoubleRoundToIntegralTests() {
EXPECT_EQ(APFloat::cmpEqual, constexpr double Eps = std::numeric_limits<double>::epsilon();
APFloat(APFloat::PPCDoubleDouble(), "2").compare(A)); constexpr double HalfEps = Eps / 2.0;
constexpr double QuarterEps = Eps / 4.0;
constexpr double SmallestNormal = std::numeric_limits<double>::min();
constexpr double EvenIntegerThreshold{uint64_t{1}
<< std::numeric_limits<double>::digits};
constexpr double Inf = std::numeric_limits<double>::infinity();
constexpr double QNaN = std::numeric_limits<double>::quiet_NaN();
using TestCase = PPCDoubleDoubleRoundToIntegralTestCase;
static constexpr auto TestCases = std::array{
// 1. Zeros and Basic Integers
// Input: Positive Zero (0.0, 0.0)
TestCase({{0.0, 0.0}})
.withRounded({0.0, 0.0}, APFloat::rmTowardZero)
.withRounded({0.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({0.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({0.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({0.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: Negative Zero (-0.0, 0.0)
TestCase({{-0.0, 0.0}})
.withRounded({-0.0, 0.0}, APFloat::rmTowardZero)
.withRounded({-0.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({-0.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({-0.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({-0.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: Positive Even (2.0, 0.0)
TestCase({{2.0, 0.0}})
.withRounded({2.0, 0.0}, APFloat::rmTowardZero)
.withRounded({2.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({2.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({2.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({2.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: Positive Odd (3.0, 0.0)
TestCase({{3.0, 0.0}})
.withRounded({3.0, 0.0}, APFloat::rmTowardZero)
.withRounded({3.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({3.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({3.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({3.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: Negative Even (-2.0, 0.0)
TestCase({{-2.0, 0.0}})
.withRounded({-2.0, 0.0}, APFloat::rmTowardZero)
.withRounded({-2.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({-2.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToEven),
// 2. General Fractions (Non-Ties)
// Input: 2.3
TestCase({{2.3, 0.0}})
.withRounded({2.0, 0.0}, APFloat::rmTowardZero)
.withRounded({2.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({3.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({2.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({2.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: 2.7
TestCase({{2.7, 0.0}})
.withRounded({2.0, 0.0}, APFloat::rmTowardZero)
.withRounded({2.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({3.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({3.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({3.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: -2.3
TestCase({{-2.3, 0.0}})
.withRounded({-2.0, 0.0}, APFloat::rmTowardZero)
.withRounded({-3.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({-2.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: -2.7
TestCase({{-2.7, 0.0}})
.withRounded({-2.0, 0.0}, APFloat::rmTowardZero)
.withRounded({-3.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({-2.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({-3.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({-3.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: 2.3 + Tiny
TestCase({{2.3, SmallestNormal}})
.withRounded({2.0, 0.0}, APFloat::rmTowardZero)
.withRounded({2.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({3.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({2.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({2.0, 0.0}, APFloat::rmNearestTiesToEven),
// 3. Exact Midpoints (Ties at N.5)
// Input: 0.5
TestCase({{0.5, 0.0}})
.withRounded({0.0, 0.0}, APFloat::rmTowardZero)
.withRounded({0.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({1.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({1.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({0.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: 1.5 (Odd base)
TestCase({{1.5, 0.0}})
.withRounded({1.0, 0.0}, APFloat::rmTowardZero)
.withRounded({1.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({2.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({2.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({2.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: 2.5 (Even base)
TestCase({{2.5, 0.0}})
.withRounded({2.0, 0.0}, APFloat::rmTowardZero)
.withRounded({2.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({3.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({3.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({2.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: -0.5
TestCase({{-0.5, 0.0}})
.withRounded({-0.0, 0.0}, APFloat::rmTowardZero)
.withRounded({-1.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({-0.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({-1.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({-0.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: -1.5 (Odd base)
TestCase({{-1.5, 0.0}})
.withRounded({-1.0, 0.0}, APFloat::rmTowardZero)
.withRounded({-2.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({-1.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: -2.5 (Even base)
TestCase({{-2.5, 0.0}})
.withRounded({-2.0, 0.0}, APFloat::rmTowardZero)
.withRounded({-3.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({-2.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({-3.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToEven),
// 4. Near Midpoints (lo breaks the tie)
// Input: Slightly > 2.5
TestCase({{2.5, SmallestNormal}})
.withRounded({2.0, 0.0}, APFloat::rmTowardZero)
.withRounded({2.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({3.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({3.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({3.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: Slightly < 2.5
TestCase({{2.5, -SmallestNormal}})
.withRounded({2.0, 0.0}, APFloat::rmTowardZero)
.withRounded({2.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({3.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({2.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({2.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: Slightly > 1.5
TestCase({{1.5, SmallestNormal}})
.withRounded({1.0, 0.0}, APFloat::rmTowardZero)
.withRounded({1.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({2.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({2.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({2.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: Slightly < 1.5
TestCase({{1.5, -SmallestNormal}})
.withRounded({1.0, 0.0}, APFloat::rmTowardZero)
.withRounded({1.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({2.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({1.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({1.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: Slightly > -2.5 (closer to 0)
TestCase({{-2.5, SmallestNormal}})
.withRounded({-2.0, 0.0}, APFloat::rmTowardZero)
.withRounded({-3.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({-2.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: Slightly < -2.5 (further from 0)
TestCase({{-2.5, -SmallestNormal}})
.withRounded({-2.0, 0.0}, APFloat::rmTowardZero)
.withRounded({-3.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({-2.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({-3.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({-3.0, 0.0}, APFloat::rmNearestTiesToEven),
// 5. Near Integers (lo crosses the integer boundary)
// Input: Slightly > 2.0
TestCase({{2.0, SmallestNormal}})
.withRounded({2.0, 0.0}, APFloat::rmTowardZero)
.withRounded({2.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({3.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({2.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({2.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: Slightly < 2.0 (1.99...)
TestCase({{2.0, -SmallestNormal}})
.withRounded({1.0, 0.0}, APFloat::rmTowardZero)
.withRounded({1.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({2.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({2.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({2.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: Slightly > -2.0 (-1.99...)
TestCase({{-2.0, SmallestNormal}})
.withRounded({-1.0, 0.0}, APFloat::rmTowardZero)
.withRounded({-2.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({-1.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: Slightly < -2.0
TestCase({{-2.0, -SmallestNormal}})
.withRounded({-2.0, 0.0}, APFloat::rmTowardZero)
.withRounded({-3.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({-2.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({-2.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: Slightly > 0.0
TestCase({{SmallestNormal, 0.0}})
.withRounded({0.0, 0.0}, APFloat::rmTowardZero)
.withRounded({0.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({1.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({0.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({0.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: Slightly < 0.0
TestCase({{-SmallestNormal, 0.0}})
.withRounded({-0.0, 0.0}, APFloat::rmTowardZero)
.withRounded({-1.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({-0.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({-0.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({-0.0, 0.0}, APFloat::rmNearestTiesToEven),
// 6. Boundary of Canonicalization (Maximum lo)
// Input: 1.0 + Max lo (1 + 2^-53)
TestCase({{1.0, HalfEps}})
.withRounded({1.0, 0.0}, APFloat::rmTowardZero)
.withRounded({1.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({2.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({1.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({1.0, 0.0}, APFloat::rmNearestTiesToEven),
// Input: 1.0 - Max lo (1 - 2^-54)
TestCase({{1.0, -QuarterEps}})
.withRounded({0.0, 0.0}, APFloat::rmTowardZero)
.withRounded({0.0, 0.0}, APFloat::rmTowardNegative)
.withRounded({1.0, 0.0}, APFloat::rmTowardPositive)
.withRounded({1.0, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({1.0, 0.0}, APFloat::rmNearestTiesToEven),
// 7. Large Magnitudes (Beyond 2^53). N = EvenIntegerThreshold (Even)
// Input: EvenIntegerThreshold (Exact)
TestCase({{EvenIntegerThreshold, 0.0}})
.withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardZero)
.withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardNegative)
.withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardPositive)
.withRounded({EvenIntegerThreshold, 0.0},
APFloat::rmNearestTiesToAway)
.withRounded({EvenIntegerThreshold, 0.0},
APFloat::rmNearestTiesToEven),
// Input: EvenIntegerThreshold+1 (Exact)
TestCase({{EvenIntegerThreshold, 1.0}})
.withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardZero)
.withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardNegative)
.withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardPositive)
.withRounded({EvenIntegerThreshold, 1.0},
APFloat::rmNearestTiesToAway)
.withRounded({EvenIntegerThreshold, 1.0},
APFloat::rmNearestTiesToEven),
// Fractions
// Input: EvenIntegerThreshold+0.25
TestCase({{EvenIntegerThreshold, 0.25}})
.withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardZero)
.withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardNegative)
.withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardPositive)
.withRounded({EvenIntegerThreshold, 0.0},
APFloat::rmNearestTiesToAway)
.withRounded({EvenIntegerThreshold, 0.0},
APFloat::rmNearestTiesToEven),
// Input: EvenIntegerThreshold+0.75
TestCase({{EvenIntegerThreshold, 0.75}})
.withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardZero)
.withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardNegative)
.withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardPositive)
.withRounded({EvenIntegerThreshold, 1.0},
APFloat::rmNearestTiesToAway)
.withRounded({EvenIntegerThreshold, 1.0},
APFloat::rmNearestTiesToEven),
// Ties (Midpoints)
// Input: EvenIntegerThreshold-0.5
TestCase({{EvenIntegerThreshold, -0.5}})
.withRounded({EvenIntegerThreshold - 1.0, 0.0}, APFloat::rmTowardZero)
.withRounded({EvenIntegerThreshold - 1.0, 0.0},
APFloat::rmTowardNegative)
.withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardPositive)
.withRounded({EvenIntegerThreshold, 0.0},
APFloat::rmNearestTiesToAway)
.withRounded({EvenIntegerThreshold, 0.0},
APFloat::rmNearestTiesToEven),
// Input: EvenIntegerThreshold+0.5
TestCase({{EvenIntegerThreshold, 0.5}})
.withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardZero)
.withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardNegative)
.withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardPositive)
.withRounded({EvenIntegerThreshold, 1.0},
APFloat::rmNearestTiesToAway)
.withRounded({EvenIntegerThreshold, 0.0},
APFloat::rmNearestTiesToEven),
// Input: EvenIntegerThreshold+1.5
TestCase({{EvenIntegerThreshold + 2.0, -0.5}})
.withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardZero)
.withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardNegative)
.withRounded({EvenIntegerThreshold + 2.0, 0.0},
APFloat::rmTowardPositive)
.withRounded({EvenIntegerThreshold + 2.0, 0.0},
APFloat::rmNearestTiesToAway)
.withRounded({EvenIntegerThreshold + 2.0, 0.0},
APFloat::rmNearestTiesToEven),
// Input: EvenIntegerThreshold+2.5
TestCase({{EvenIntegerThreshold + 2.0, 0.5}})
.withRounded({EvenIntegerThreshold + 2.0, 0.0}, APFloat::rmTowardZero)
.withRounded({EvenIntegerThreshold + 2.0, 0.0},
APFloat::rmTowardNegative)
.withRounded({EvenIntegerThreshold + 4.0, -1.0},
APFloat::rmTowardPositive)
.withRounded({EvenIntegerThreshold + 4.0, -1.0},
APFloat::rmNearestTiesToAway)
.withRounded({EvenIntegerThreshold + 2.0, 0.0},
APFloat::rmNearestTiesToEven),
// Near Ties
// Input: EvenIntegerThreshold+0.5+HalfEps
TestCase({{EvenIntegerThreshold, 0.5 + HalfEps}})
.withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardZero)
.withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardNegative)
.withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardPositive)
.withRounded({EvenIntegerThreshold, 1.0},
APFloat::rmNearestTiesToAway)
.withRounded({EvenIntegerThreshold, 1.0},
APFloat::rmNearestTiesToEven),
// Input: EvenIntegerThreshold+0.5-QuarterEps
TestCase({{EvenIntegerThreshold, 0.5 - QuarterEps}})
.withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardZero)
.withRounded({EvenIntegerThreshold, 0.0}, APFloat::rmTowardNegative)
.withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardPositive)
.withRounded({EvenIntegerThreshold, 0.0},
APFloat::rmNearestTiesToAway)
.withRounded({EvenIntegerThreshold, 0.0},
APFloat::rmNearestTiesToEven),
// Canonical Boundary (Max lo for EvenIntegerThreshold is 1.0)
// Input: EvenIntegerThreshold+1.0
TestCase({{EvenIntegerThreshold, 1.0}})
.withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardZero)
.withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardNegative)
.withRounded({EvenIntegerThreshold, 1.0}, APFloat::rmTowardPositive)
.withRounded({EvenIntegerThreshold, 1.0},
APFloat::rmNearestTiesToAway)
.withRounded({EvenIntegerThreshold, 1.0},
APFloat::rmNearestTiesToEven),
// 8. Special Values
// Input: +Inf
TestCase({{Inf, 0.0}})
.withRounded({Inf, 0.0}, APFloat::rmTowardZero)
.withRounded({Inf, 0.0}, APFloat::rmTowardNegative)
.withRounded({Inf, 0.0}, APFloat::rmTowardPositive)
.withRounded({Inf, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({Inf, 0.0}, APFloat::rmNearestTiesToEven),
// Input: -Inf
TestCase({{-Inf, 0.0}})
.withRounded({-Inf, 0.0}, APFloat::rmTowardZero)
.withRounded({-Inf, 0.0}, APFloat::rmTowardNegative)
.withRounded({-Inf, 0.0}, APFloat::rmTowardPositive)
.withRounded({-Inf, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({-Inf, 0.0}, APFloat::rmNearestTiesToEven),
// Input: NaN input hi. Expected output canonical (NaN, 0.0).
TestCase({{QNaN, 0.0}})
.withRounded({QNaN, 0.0}, APFloat::rmTowardZero)
.withRounded({QNaN, 0.0}, APFloat::rmTowardNegative)
.withRounded({QNaN, 0.0}, APFloat::rmTowardPositive)
.withRounded({QNaN, 0.0}, APFloat::rmNearestTiesToAway)
.withRounded({QNaN, 0.0}, APFloat::rmNearestTiesToEven),
};
return TestCases;
}
class PPCDoubleDoubleRoundToIntegralValueTest
: public testing::Test,
public ::testing::WithParamInterface<
PPCDoubleDoubleRoundToIntegralTestCase> {};
INSTANTIATE_TEST_SUITE_P(
PPCDoubleDoubleRoundToIntegralValueParamTests,
PPCDoubleDoubleRoundToIntegralValueTest,
::testing::ValuesIn(ppcDoubleDoubleRoundToIntegralTests()));
TEST_P(PPCDoubleDoubleRoundToIntegralValueTest,
PPCDoubleDoubleRoundToIntegral) {
const PPCDoubleDoubleRoundToIntegralTestCase TestCase = GetParam();
const APFloat Input = makeDoubleAPFloat(TestCase.Input);
EXPECT_FALSE(Input.isDenormal())
<< TestCase.Input.Hi << " + " << TestCase.Input.Lo;
for (size_t I = 0, E = std::size(TestCase.Rounded); I != E; ++I) {
const auto RM = static_cast<APFloat::roundingMode>(I);
const APFloat Expected = makeDoubleAPFloat(TestCase.Rounded[I]);
EXPECT_FALSE(Expected.isDenormal())
<< TestCase.Rounded[I].Hi << " + " << TestCase.Input.Lo;
APFloat Actual = Input;
Actual.roundToIntegral(RM);
if (Actual.isNaN())
EXPECT_TRUE(Actual.isNaN());
else
EXPECT_EQ(Actual.compare(Expected), APFloat::cmpEqual)
<< "RM: " << RM << " Input.Hi: " << TestCase.Input.Hi
<< " Input.Lo: " << TestCase.Input.Lo << " Actual: " << Actual
<< " Expected.Hi: " << TestCase.Rounded[I].Hi
<< " Expected.Lo: " << TestCase.Rounded[I].Lo
<< " Expected: " << Expected;
} }
} }
@ -5551,13 +6020,9 @@ TEST(APFloatTest, PPCDoubleDoubleNext) {
return X; return X;
}; };
auto Zero = [] { auto Zero = [] { return APFloat::getZero(APFloat::IEEEdouble()); };
return APFloat::getZero(APFloat::IEEEdouble());
};
auto One = [] { auto One = [] { return APFloat::getOne(APFloat::IEEEdouble()); };
return APFloat::getOne(APFloat::IEEEdouble());
};
// 0x1p-1074 // 0x1p-1074
auto MinSubnormal = [] { auto MinSubnormal = [] {
@ -5574,24 +6039,6 @@ TEST(APFloatTest, PPCDoubleDoubleNext) {
// 2^-53 // 2^-53
auto EpsNeg = [&] { return scalbn(Eps(), -1, APFloat::rmNearestTiesToEven); }; auto EpsNeg = [&] { return scalbn(Eps(), -1, APFloat::rmNearestTiesToEven); };
auto MakeDoubleAPFloat = [](auto Hi, auto Lo) {
APFloat HiFloat{APFloat::IEEEdouble(), APFloat::uninitialized};
if constexpr (std::is_same_v<decltype(Hi), APFloat>) {
HiFloat = Hi;
} else {
HiFloat = {APFloat::IEEEdouble(), Hi};
}
APFloat LoFloat{APFloat::IEEEdouble(), APFloat::uninitialized};
if constexpr (std::is_same_v<decltype(Lo), APFloat>) {
LoFloat = Lo;
} else {
LoFloat = {APFloat::IEEEdouble(), Lo};
}
APInt Bits = LoFloat.bitcastToAPInt().concat(HiFloat.bitcastToAPInt());
return APFloat(APFloat::PPCDoubleDouble(), Bits);
};
APFloat Test(APFloat::PPCDoubleDouble(), APFloat::uninitialized); APFloat Test(APFloat::PPCDoubleDouble(), APFloat::uninitialized);
APFloat Expected(APFloat::PPCDoubleDouble(), APFloat::uninitialized); APFloat Expected(APFloat::PPCDoubleDouble(), APFloat::uninitialized);
@ -5719,55 +6166,55 @@ TEST(APFloatTest, PPCDoubleDoubleNext) {
// 2b. |hi| >= 2*DBL_MIN_NORMAL (DD precision > D precision) // 2b. |hi| >= 2*DBL_MIN_NORMAL (DD precision > D precision)
// Test at hi = 1.0, lo = 0. // Test at hi = 1.0, lo = 0.
Test = MakeDoubleAPFloat(One(), Zero()); Test = makeDoubleAPFloat(One(), Zero());
Expected = MakeDoubleAPFloat(One(), MinSubnormal()); Expected = makeDoubleAPFloat(One(), MinSubnormal());
EXPECT_EQ(Test.next(false), APFloat::opOK); EXPECT_EQ(Test.next(false), APFloat::opOK);
EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
// Test at hi = -1.0. delta = 2^-1074 (positive, moving towards +Inf). // Test at hi = -1.0. delta = 2^-1074 (positive, moving towards +Inf).
Test = MakeDoubleAPFloat(-One(), Zero()); Test = makeDoubleAPFloat(-One(), Zero());
Expected = MakeDoubleAPFloat(-One(), MinSubnormal()); Expected = makeDoubleAPFloat(-One(), MinSubnormal());
EXPECT_EQ(Test.next(false), APFloat::opOK); EXPECT_EQ(Test.next(false), APFloat::opOK);
EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
// Testing the boundary where calculated delta equals DBL_TRUE_MIN. // Testing the boundary where calculated delta equals DBL_TRUE_MIN.
// Requires ilogb(hi) = E = -968. // Requires ilogb(hi) = E = -968.
// delta = 2^(-968 - 106) = 2^-1074 = DBL_TRUE_MIN. // delta = 2^(-968 - 106) = 2^-1074 = DBL_TRUE_MIN.
Test = MakeDoubleAPFloat("0x1p-968", Zero()); Test = makeDoubleAPFloat("0x1p-968", Zero());
Expected = MakeDoubleAPFloat("0x1p-968", MinSubnormal()); Expected = makeDoubleAPFloat("0x1p-968", MinSubnormal());
EXPECT_EQ(Test.next(false), APFloat::opOK); EXPECT_EQ(Test.next(false), APFloat::opOK);
EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
// Testing below the boundary (E < -968). Delta clamps to DBL_TRUE_MIN. // Testing below the boundary (E < -968). Delta clamps to DBL_TRUE_MIN.
Test = MakeDoubleAPFloat("0x1p-969", Zero()); Test = makeDoubleAPFloat("0x1p-969", Zero());
Expected = MakeDoubleAPFloat("0x1p-969", MinSubnormal()); Expected = makeDoubleAPFloat("0x1p-969", MinSubnormal());
EXPECT_EQ(Test.next(false), APFloat::opOK); EXPECT_EQ(Test.next(false), APFloat::opOK);
EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
// 3. Standard Increment (No rollover) // 3. Standard Increment (No rollover)
// hi=1.0, lo=2^-1074. // hi=1.0, lo=2^-1074.
Test = MakeDoubleAPFloat(One(), MinSubnormal()); Test = makeDoubleAPFloat(One(), MinSubnormal());
Expected = MakeDoubleAPFloat(One(), NextUp(MinSubnormal())); Expected = makeDoubleAPFloat(One(), NextUp(MinSubnormal()));
EXPECT_EQ(Test.next(false), APFloat::opOK); EXPECT_EQ(Test.next(false), APFloat::opOK);
EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
// Incrementing negative lo. // Incrementing negative lo.
Test = MakeDoubleAPFloat(One(), -MinSubnormal()); Test = makeDoubleAPFloat(One(), -MinSubnormal());
Expected = MakeDoubleAPFloat(One(), Zero()); Expected = makeDoubleAPFloat(One(), Zero());
EXPECT_EQ(Test.next(false), APFloat::opOK); EXPECT_EQ(Test.next(false), APFloat::opOK);
EXPECT_EQ(Test.compare(Expected), APFloat::cmpEqual); EXPECT_EQ(Test.compare(Expected), APFloat::cmpEqual);
// Crossing lo=0. // Crossing lo=0.
Test = MakeDoubleAPFloat(One(), -MinSubnormal()); Test = makeDoubleAPFloat(One(), -MinSubnormal());
Expected = MakeDoubleAPFloat(One(), Zero()); Expected = makeDoubleAPFloat(One(), Zero());
EXPECT_EQ(Test.next(false), APFloat::opOK); EXPECT_EQ(Test.next(false), APFloat::opOK);
EXPECT_EQ(Test.compare(Expected), APFloat::cmpEqual); EXPECT_EQ(Test.compare(Expected), APFloat::cmpEqual);
// 4. Rollover Cases around 1.0 (Positive hi) // 4. Rollover Cases around 1.0 (Positive hi)
// hi=1.0, lo=nextDown(2^-53). // hi=1.0, lo=nextDown(2^-53).
Test = MakeDoubleAPFloat(One(), NextDown(EpsNeg())); Test = makeDoubleAPFloat(One(), NextDown(EpsNeg()));
EXPECT_FALSE(Test.isDenormal()); EXPECT_FALSE(Test.isDenormal());
Expected = MakeDoubleAPFloat(One(), EpsNeg()); Expected = makeDoubleAPFloat(One(), EpsNeg());
EXPECT_FALSE(Test.isDenormal()); EXPECT_FALSE(Test.isDenormal());
EXPECT_EQ(Test.next(false), APFloat::opOK); EXPECT_EQ(Test.next(false), APFloat::opOK);
EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
@ -5778,17 +6225,17 @@ TEST(APFloatTest, PPCDoubleDoubleNext) {
// Can't naively TwoSum(0x1p+0, nextUp(0x1p-53)): // Can't naively TwoSum(0x1p+0, nextUp(0x1p-53)):
// It gives {nextUp(0x1p+0), nextUp(nextUp(-0x1p-53))} but the next // It gives {nextUp(0x1p+0), nextUp(nextUp(-0x1p-53))} but the next
// number should be {nextUp(0x1p+0), nextUp(-0x1p-53)}. // number should be {nextUp(0x1p+0), nextUp(-0x1p-53)}.
Test = MakeDoubleAPFloat(One(), EpsNeg()); Test = makeDoubleAPFloat(One(), EpsNeg());
EXPECT_FALSE(Test.isDenormal()); EXPECT_FALSE(Test.isDenormal());
Expected = MakeDoubleAPFloat(NextUp(One()), NextUp(-EpsNeg())); Expected = makeDoubleAPFloat(NextUp(One()), NextUp(-EpsNeg()));
EXPECT_EQ(Test.next(false), APFloat::opOK); EXPECT_EQ(Test.next(false), APFloat::opOK);
EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
EXPECT_FALSE(Test.isDenormal()); EXPECT_FALSE(Test.isDenormal());
// hi = nextDown(1), lo = nextDown(0x1p-54) // hi = nextDown(1), lo = nextDown(0x1p-54)
Test = MakeDoubleAPFloat(NextDown(One()), NextDown(APFloat(0x1p-54))); Test = makeDoubleAPFloat(NextDown(One()), NextDown(APFloat(0x1p-54)));
EXPECT_FALSE(Test.isDenormal()); EXPECT_FALSE(Test.isDenormal());
Expected = MakeDoubleAPFloat(One(), APFloat(-0x1p-54)); Expected = makeDoubleAPFloat(One(), APFloat(-0x1p-54));
EXPECT_EQ(Test.next(false), APFloat::opOK); EXPECT_EQ(Test.next(false), APFloat::opOK);
EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
EXPECT_FALSE(Test.isDenormal()); EXPECT_FALSE(Test.isDenormal());
@ -5796,26 +6243,26 @@ TEST(APFloatTest, PPCDoubleDoubleNext) {
// 5. Negative Rollover (Moving towards Zero / +Inf) // 5. Negative Rollover (Moving towards Zero / +Inf)
// hi = -1, lo = nextDown(0x1p-54) // hi = -1, lo = nextDown(0x1p-54)
Test = MakeDoubleAPFloat(APFloat(-1.0), NextDown(APFloat(0x1p-54))); Test = makeDoubleAPFloat(APFloat(-1.0), NextDown(APFloat(0x1p-54)));
EXPECT_FALSE(Test.isDenormal()); EXPECT_FALSE(Test.isDenormal());
Expected = MakeDoubleAPFloat(APFloat(-1.0), APFloat(0x1p-54)); Expected = makeDoubleAPFloat(APFloat(-1.0), APFloat(0x1p-54));
EXPECT_EQ(Test.next(false), APFloat::opOK); EXPECT_EQ(Test.next(false), APFloat::opOK);
EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
EXPECT_FALSE(Test.isDenormal()); EXPECT_FALSE(Test.isDenormal());
// hi = -1, lo = 0x1p-54 // hi = -1, lo = 0x1p-54
Test = MakeDoubleAPFloat(APFloat(-1.0), APFloat(0x1p-54)); Test = makeDoubleAPFloat(APFloat(-1.0), APFloat(0x1p-54));
EXPECT_FALSE(Test.isDenormal()); EXPECT_FALSE(Test.isDenormal());
Expected = Expected =
MakeDoubleAPFloat(NextUp(APFloat(-1.0)), NextUp(APFloat(-0x1p-54))); makeDoubleAPFloat(NextUp(APFloat(-1.0)), NextUp(APFloat(-0x1p-54)));
EXPECT_EQ(Test.next(false), APFloat::opOK); EXPECT_EQ(Test.next(false), APFloat::opOK);
EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
EXPECT_FALSE(Test.isDenormal()); EXPECT_FALSE(Test.isDenormal());
// 6. Rollover across Power of 2 boundary (Exponent change) // 6. Rollover across Power of 2 boundary (Exponent change)
Test = MakeDoubleAPFloat(NextDown(APFloat(2.0)), NextDown(EpsNeg())); Test = makeDoubleAPFloat(NextDown(APFloat(2.0)), NextDown(EpsNeg()));
EXPECT_FALSE(Test.isDenormal()); EXPECT_FALSE(Test.isDenormal());
Expected = MakeDoubleAPFloat(APFloat(2.0), -EpsNeg()); Expected = makeDoubleAPFloat(APFloat(2.0), -EpsNeg());
EXPECT_EQ(Test.next(false), APFloat::opOK); EXPECT_EQ(Test.next(false), APFloat::opOK);
EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
EXPECT_FALSE(Test.isDenormal()); EXPECT_FALSE(Test.isDenormal());