[libclc] Reduce bithacking in CLC frexp (#129871)

Also replace some magic constants with named ones.

Checking against FP zero and using isnan and isinf functions allows the
optimizer to create one unified @llvm.is.fpclass intrinsic. This results
in fewer more canonical IR instructions.
This commit is contained in:
Fraser Cormack 2025-03-05 14:18:51 +00:00 committed by GitHub
parent adb5d6aeae
commit 760eeac6a2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 32 additions and 22 deletions

View File

@ -23,6 +23,8 @@
#include <clc/clc_convert.h>
#include <clc/internal/clc.h>
#include <clc/math/math.h>
#include <clc/relational/clc_isinf.h>
#include <clc/relational/clc_isnan.h>
#include <clc/relational/clc_select.h>
#include <clc/utils.h>

View File

@ -28,17 +28,22 @@
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
__clc_frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) {
__CLC_INTN i = __CLC_AS_INTN(x);
__CLC_INTN ai = i & 0x7fffffff;
__CLC_INTN d = ai > 0 & ai < 0x00800000;
/* scale subnormal by 2^26 without multiplying */
__CLC_INTN ai = i & EXSIGNBIT_SP32;
// Scale subnormal by 2^26 without multiplying
__CLC_INTN is_subnormal = ai > 0 && ai < 0x00800000;
__CLC_GENTYPE s = __CLC_AS_GENTYPE(ai | 0x0d800000) - 0x1.0p-100f;
ai = __clc_select(ai, __CLC_AS_INTN(s), d);
__CLC_INTN e =
(ai >> 23) - 126 - __clc_select((__CLC_INTN)0, (__CLC_INTN)26, d);
__CLC_INTN t = ai == (__CLC_INTN)0 | e == (__CLC_INTN)129;
i = (i & (__CLC_INTN)0x80000000) | (__CLC_INTN)0x3f000000 | (ai & 0x007fffff);
*ep = __clc_select(e, (__CLC_INTN)0, t);
return __clc_select(__CLC_AS_GENTYPE(i), x, t);
ai = __clc_select(ai, __CLC_AS_INTN(s), is_subnormal);
__CLC_INTN e = (ai >> EXPSHIFTBITS_SP32) - 126 -
__clc_select((__CLC_INTN)0, (__CLC_INTN)26, is_subnormal);
i = (i & (__CLC_INTN)SIGNBIT_SP32) | (__CLC_INTN)HALFEXPBITS_SP32 |
(ai & (__CLC_INTN)MANTBITS_SP32);
__CLC_INTN is_inf_nan_or_zero =
x == __CLC_FP_LIT(0.0) || __clc_isinf(x) || __clc_isnan(x);
*ep = __clc_select(e, (__CLC_INTN)0, is_inf_nan_or_zero);
return __clc_select(__CLC_AS_GENTYPE(i), x, is_inf_nan_or_zero);
}
#endif
@ -56,19 +61,22 @@ __clc_frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) {
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
__clc_frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) {
__CLC_LONGN i = __CLC_AS_LONGN(x);
__CLC_LONGN ai = i & 0x7fffffffffffffffL;
__CLC_LONGN d = ai > 0 & ai < 0x0010000000000000L;
// scale subnormal by 2^54 without multiplying
__CLC_LONGN ai = i & EXSIGNBIT_DP64;
// Scale subnormal by 2^54 without multiplying
__CLC_LONGN is_subnormal = ai > 0 && ai < 0x0010000000000000L;
__CLC_GENTYPE s = __CLC_AS_GENTYPE(ai | 0x0370000000000000L) - 0x1.0p-968;
ai = __clc_select(ai, __CLC_AS_LONGN(s), d);
__CLC_LONGN e = (ai >> 52) - (__CLC_LONGN)1022 -
__clc_select((__CLC_LONGN)0, (__CLC_LONGN)54, d);
__CLC_LONGN t = ai == 0 | e == 1025;
i = (i & (__CLC_LONGN)0x8000000000000000L) |
(__CLC_LONGN)0x3fe0000000000000L |
(ai & (__CLC_LONGN)0x000fffffffffffffL);
*ep = __CLC_CONVERT_INTN(__clc_select(e, 0L, t));
return __clc_select(__CLC_AS_GENTYPE(i), x, t);
ai = __clc_select(ai, __CLC_AS_LONGN(s), is_subnormal);
__CLC_LONGN e = (ai >> EXPSHIFTBITS_DP64) - (__CLC_LONGN)1022 -
__clc_select((__CLC_LONGN)0, (__CLC_LONGN)54, is_subnormal);
i = (i & (__CLC_LONGN)SIGNBIT_DP64) | (__CLC_LONGN)HALFEXPBITS_DP64 |
(ai & (__CLC_LONGN)MANTBITS_DP64);
__CLC_LONGN is_inf_nan_or_zero =
x == __CLC_FP_LIT(0.0) || __clc_isinf(x) || __clc_isnan(x);
*ep = __CLC_CONVERT_INTN(__clc_select(e, 0L, is_inf_nan_or_zero));
return __clc_select(__CLC_AS_GENTYPE(i), x, is_inf_nan_or_zero);
}
#endif