[compiler-rt][ARM] Double-precision FP support functions (#179920)
This commit adds C helper functions `dnan2`, `dnorm2` and `dunder` for handling the less critical edge cases of double-precision arithmetic, similar to `fnan2`, `fnorm2` and `funder` that were added in commit f7e652127772e93. It also adds a header file that defines some register aliases for handling double-precision numbers in AArch32 software floating point in an endianness-independent way, by providing aliases `xh` and `xl` for the high and low words of the first double-precision function argument, regardless of which of them is in r0 and which in r1, and similarly `yh` and `yl` for the second argument in r2/r3.
This commit is contained in:
parent
3aeea10371
commit
80831832e0
@ -476,6 +476,9 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP AND BUILTIN_SUPPORTED_ARCH MATCHES "arm")
|
||||
arm/fnan2.c
|
||||
arm/fnorm2.c
|
||||
arm/funder.c
|
||||
arm/dnan2.c
|
||||
arm/dnorm2.c
|
||||
arm/dunder.c
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
44
compiler-rt/lib/builtins/arm/dnan2.c
Normal file
44
compiler-rt/lib/builtins/arm/dnan2.c
Normal file
@ -0,0 +1,44 @@
|
||||
//===-- dnan2.c - Handle double-precision NaN inputs to binary operation --===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This helper function is available for use by double-precision float
|
||||
// arithmetic implementations to handle propagating NaNs from the input
|
||||
// operands to the output, in a way that matches Arm hardware FP.
|
||||
//
|
||||
// On input, a and b are floating-point numbers in IEEE 754 encoding, and at
|
||||
// least one of them must be a NaN. The return value is the correct output NaN.
|
||||
//
|
||||
// A signalling NaN in the input (with bit 51 clear) takes priority over any
|
||||
// quiet NaN, and is adjusted on return by setting bit 51 to make it quiet. If
|
||||
// both inputs are the same type of NaN then the first input takes priority:
|
||||
// the input a is used instead of b.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
uint64_t __compiler_rt_dnan2(uint64_t a, uint64_t b) {
|
||||
// Make shifted-left copies of a and b to discard the sign bit. Then add 1 at
|
||||
// the bit position where the quiet vs signalling bit ended up. This squashes
|
||||
// all the signalling NaNs to the top of the range of 64-bit values, from
|
||||
// 0xfff0000000000001 to 0xffffffffffffffff inclusive; meanwhile, all the
|
||||
// quiet NaN values wrap round to the bottom, from 0 to 0x000fffffffffffff
|
||||
// inclusive. So we can detect a signalling NaN by asking if it's greater
|
||||
// than 0xfff0000000000000, and a quiet one by asking if it's less than
|
||||
// 0x0010000000000000.
|
||||
uint64_t aadj = (a << 1) + 0x0010000000000000;
|
||||
uint64_t badj = (b << 1) + 0x0010000000000000;
|
||||
|
||||
if (aadj > 0xfff0000000000000) // a is a signalling NaN?
|
||||
return a | 0x0008000000000000; // if so, return it with the quiet bit set
|
||||
if (badj > 0xfff0000000000000) // b is a signalling NaN?
|
||||
return b | 0x0008000000000000; // if so, return it with the quiet bit set
|
||||
if (aadj < 0x0010000000000000) // a is a quiet NaN?
|
||||
return a; // if so, return it
|
||||
return b; // otherwise we expect b must be a quiet NaN
|
||||
}
|
||||
59
compiler-rt/lib/builtins/arm/dnorm2.c
Normal file
59
compiler-rt/lib/builtins/arm/dnorm2.c
Normal file
@ -0,0 +1,59 @@
|
||||
//===-- dnorm2.c - Handle double-precision denormal inputs to binary op ---===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This helper function is available for use by double-precision float
|
||||
// arithmetic implementations, to handle denormal inputs on entry by
|
||||
// renormalizing the mantissa and modifying the exponent to match.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
// Structure containing the function's inputs and outputs.
|
||||
//
|
||||
// On entry: a, b are two input floating-point numbers, still in IEEE 754
|
||||
// encoding. expa and expb are the 8-bit exponents of those numbers, extracted
|
||||
// and shifted down to the low 8 bits of the word, with no other change.
|
||||
// Neither value should be zero, or have the maximum exponent (indicating an
|
||||
// infinity or NaN).
|
||||
//
|
||||
// On exit: each of a and b contains the mantissa of the input value, with the
|
||||
// leading 1 bit made explicit, and shifted up to bit 52 (the same place it
|
||||
// would have been if the number was normalized already). If expa was zero
|
||||
// (indicating that a was denormal) then it is now represented as a normalized
|
||||
// number with an out-of-range exponent (zero or negative). The same applies to
|
||||
// expb and b.
|
||||
//
|
||||
// The sign bits from the input floating-point numbers are discarded
|
||||
// completely. The caller is expected to have stored those somewhere
|
||||
// safe already.
|
||||
struct dnorm2 {
|
||||
uint64_t a, b;
|
||||
uint32_t expa, expb;
|
||||
};
|
||||
|
||||
void __compiler_rt_dnorm2(struct dnorm2 *values) {
|
||||
values->a &= ~0xFFF0000000000000ull;
|
||||
values->b &= ~0xFFF0000000000000ull;
|
||||
|
||||
if (values->expa == 0) {
|
||||
unsigned shift = __builtin_clzll(values->a) - 11;
|
||||
values->a <<= shift;
|
||||
values->expa = 1 - shift;
|
||||
} else {
|
||||
values->a |= 0x0010000000000000ull;
|
||||
}
|
||||
|
||||
if (values->expb == 0) {
|
||||
unsigned shift = __builtin_clzll(values->b) - 11;
|
||||
values->b <<= shift;
|
||||
values->expb = 1 - shift;
|
||||
} else {
|
||||
values->b |= 0x0010000000000000ull;
|
||||
}
|
||||
}
|
||||
78
compiler-rt/lib/builtins/arm/dunder.c
Normal file
78
compiler-rt/lib/builtins/arm/dunder.c
Normal file
@ -0,0 +1,78 @@
|
||||
//===-- dunder.c - Handle double-precision floating-point underflow -------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This helper function is available for use by double-precision float
|
||||
// arithmetic implementations to handle underflowed output values, if they were
|
||||
// computed in the form of a normalized mantissa and an out-of-range exponent.
|
||||
//
|
||||
// On input: x should be a complete IEEE 754 floating-point value representing
|
||||
// the desired output scaled up by 2^1536 (the same value that would have been
|
||||
// passed to an underflow trap handler in IEEE 754:1985).
|
||||
//
|
||||
// This isn't enough information to re-round to the correct output denormal
|
||||
// without also knowing whether x itself has already been rounded, and which
|
||||
// way. 'errsign' gives this information, by indicating the sign of the value
|
||||
// (true result - x). That is, if errsign > 0 it means the true value was
|
||||
// larger (x was rounded down); if errsign < 0 then x was rounded up; if
|
||||
// errsign == 0 then x represents the _exact_ desired output value.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define SIGNBIT 0x8000000000000000ull
|
||||
#define MANTSIZE 52
|
||||
#define BIAS 0x600
|
||||
|
||||
uint64_t __compiler_rt_dunder(uint64_t x, uint32_t errsign) {
|
||||
uint64_t sign = x & SIGNBIT;
|
||||
uint64_t exponent = (x << 1) >> 53;
|
||||
|
||||
// Rule out exponents so small (or large!) that no denormalisation
|
||||
// is needed.
|
||||
if (exponent > BIAS) {
|
||||
// Exponent 0x601 or above means a normalised number got here by
|
||||
// mistake, so we just remove the 0x600 exponent bias and go
|
||||
// straight home.
|
||||
return x - ((uint64_t)BIAS << MANTSIZE);
|
||||
}
|
||||
uint32_t bits_lost = BIAS + 1 - exponent;
|
||||
if (bits_lost > MANTSIZE + 1) {
|
||||
// The implicit leading 1 of the intermediate value's mantissa is
|
||||
// below the lowest mantissa bit of a denormal by at least 2 bits.
|
||||
// Round down to 0 unconditionally.
|
||||
return sign;
|
||||
}
|
||||
|
||||
// Make the full mantissa (with leading bit) at the top of the word.
|
||||
uint64_t mantissa = 0x8000000000000000ull | (x << 11);
|
||||
// Adjust by 1 depending on the sign of the error.
|
||||
mantissa -= errsign >> 31;
|
||||
mantissa += (-errsign) >> 31;
|
||||
|
||||
// Shift down to the output position, keeping the bits shifted off.
|
||||
uint64_t outmant, shifted_off;
|
||||
if (bits_lost == MANTSIZE + 1) {
|
||||
// Special case for the exponent where we have to shift the whole
|
||||
// of 'mantissa' off the bottom of the word.
|
||||
outmant = 0;
|
||||
shifted_off = mantissa;
|
||||
} else {
|
||||
outmant = mantissa >> (11 + bits_lost);
|
||||
shifted_off = mantissa << (64 - (11 + bits_lost));
|
||||
}
|
||||
|
||||
// Re-round.
|
||||
if (shifted_off >> 63) {
|
||||
outmant++;
|
||||
if (!(shifted_off << 1))
|
||||
outmant &= ~1; // halfway case: round to even
|
||||
}
|
||||
|
||||
return sign | outmant;
|
||||
}
|
||||
39
compiler-rt/lib/builtins/arm/endian.h
Normal file
39
compiler-rt/lib/builtins/arm/endian.h
Normal file
@ -0,0 +1,39 @@
|
||||
//===-- endian.h - make double-prec software FP work in both endiannesses -===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This header file should be included from assembly source code (not C). It
|
||||
// defines two pairs of register aliases, for handling 64-bit values passed and
|
||||
// returned from functions in the AArch32 integer registers:
|
||||
//
|
||||
// xh, xl the high and low words of a 64-bit value passed in {r0,r1}
|
||||
// yh, yl the high and low words of a 64-bit value passed in {r2,r3}
|
||||
//
|
||||
// Which alias goes with which register depends on endianness.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef COMPILER_RT_ARM_FP_ENDIAN_H
|
||||
#define COMPILER_RT_ARM_FP_ENDIAN_H
|
||||
|
||||
// clang-format off
|
||||
|
||||
#ifdef __BIG_ENDIAN__
|
||||
// Big-endian: high words are in lower-numbered registers.
|
||||
xh .req r0
|
||||
xl .req r1
|
||||
yh .req r2
|
||||
yl .req r3
|
||||
#else
|
||||
// Little-endian: low words are in lower-numbered registers.
|
||||
xl .req r0
|
||||
xh .req r1
|
||||
yl .req r2
|
||||
yh .req r3
|
||||
#endif
|
||||
|
||||
#endif // COMPILER_RT_ARM_FP_ENDIAN_H
|
||||
Loading…
x
Reference in New Issue
Block a user