[libclc] Reduce bithacking in CLC frexp (#129871)
Also replace some magic constants with named ones. Checking against FP zero and using isnan and isinf functions allows the optimizer to create one unified @llvm.is.fpclass intrinsic. This results in fewer more canonical IR instructions.
This commit is contained in:
parent
adb5d6aeae
commit
760eeac6a2
@ -23,6 +23,8 @@
|
||||
#include <clc/clc_convert.h>
|
||||
#include <clc/internal/clc.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/relational/clc_isinf.h>
|
||||
#include <clc/relational/clc_isnan.h>
|
||||
#include <clc/relational/clc_select.h>
|
||||
#include <clc/utils.h>
|
||||
|
||||
|
@ -28,17 +28,22 @@
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
|
||||
__clc_frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) {
|
||||
__CLC_INTN i = __CLC_AS_INTN(x);
|
||||
__CLC_INTN ai = i & 0x7fffffff;
|
||||
__CLC_INTN d = ai > 0 & ai < 0x00800000;
|
||||
/* scale subnormal by 2^26 without multiplying */
|
||||
__CLC_INTN ai = i & EXSIGNBIT_SP32;
|
||||
|
||||
// Scale subnormal by 2^26 without multiplying
|
||||
__CLC_INTN is_subnormal = ai > 0 && ai < 0x00800000;
|
||||
__CLC_GENTYPE s = __CLC_AS_GENTYPE(ai | 0x0d800000) - 0x1.0p-100f;
|
||||
ai = __clc_select(ai, __CLC_AS_INTN(s), d);
|
||||
__CLC_INTN e =
|
||||
(ai >> 23) - 126 - __clc_select((__CLC_INTN)0, (__CLC_INTN)26, d);
|
||||
__CLC_INTN t = ai == (__CLC_INTN)0 | e == (__CLC_INTN)129;
|
||||
i = (i & (__CLC_INTN)0x80000000) | (__CLC_INTN)0x3f000000 | (ai & 0x007fffff);
|
||||
*ep = __clc_select(e, (__CLC_INTN)0, t);
|
||||
return __clc_select(__CLC_AS_GENTYPE(i), x, t);
|
||||
ai = __clc_select(ai, __CLC_AS_INTN(s), is_subnormal);
|
||||
__CLC_INTN e = (ai >> EXPSHIFTBITS_SP32) - 126 -
|
||||
__clc_select((__CLC_INTN)0, (__CLC_INTN)26, is_subnormal);
|
||||
|
||||
i = (i & (__CLC_INTN)SIGNBIT_SP32) | (__CLC_INTN)HALFEXPBITS_SP32 |
|
||||
(ai & (__CLC_INTN)MANTBITS_SP32);
|
||||
|
||||
__CLC_INTN is_inf_nan_or_zero =
|
||||
x == __CLC_FP_LIT(0.0) || __clc_isinf(x) || __clc_isnan(x);
|
||||
*ep = __clc_select(e, (__CLC_INTN)0, is_inf_nan_or_zero);
|
||||
return __clc_select(__CLC_AS_GENTYPE(i), x, is_inf_nan_or_zero);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -56,19 +61,22 @@ __clc_frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) {
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
|
||||
__clc_frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) {
|
||||
__CLC_LONGN i = __CLC_AS_LONGN(x);
|
||||
__CLC_LONGN ai = i & 0x7fffffffffffffffL;
|
||||
__CLC_LONGN d = ai > 0 & ai < 0x0010000000000000L;
|
||||
// scale subnormal by 2^54 without multiplying
|
||||
__CLC_LONGN ai = i & EXSIGNBIT_DP64;
|
||||
|
||||
// Scale subnormal by 2^54 without multiplying
|
||||
__CLC_LONGN is_subnormal = ai > 0 && ai < 0x0010000000000000L;
|
||||
__CLC_GENTYPE s = __CLC_AS_GENTYPE(ai | 0x0370000000000000L) - 0x1.0p-968;
|
||||
ai = __clc_select(ai, __CLC_AS_LONGN(s), d);
|
||||
__CLC_LONGN e = (ai >> 52) - (__CLC_LONGN)1022 -
|
||||
__clc_select((__CLC_LONGN)0, (__CLC_LONGN)54, d);
|
||||
__CLC_LONGN t = ai == 0 | e == 1025;
|
||||
i = (i & (__CLC_LONGN)0x8000000000000000L) |
|
||||
(__CLC_LONGN)0x3fe0000000000000L |
|
||||
(ai & (__CLC_LONGN)0x000fffffffffffffL);
|
||||
*ep = __CLC_CONVERT_INTN(__clc_select(e, 0L, t));
|
||||
return __clc_select(__CLC_AS_GENTYPE(i), x, t);
|
||||
ai = __clc_select(ai, __CLC_AS_LONGN(s), is_subnormal);
|
||||
__CLC_LONGN e = (ai >> EXPSHIFTBITS_DP64) - (__CLC_LONGN)1022 -
|
||||
__clc_select((__CLC_LONGN)0, (__CLC_LONGN)54, is_subnormal);
|
||||
|
||||
i = (i & (__CLC_LONGN)SIGNBIT_DP64) | (__CLC_LONGN)HALFEXPBITS_DP64 |
|
||||
(ai & (__CLC_LONGN)MANTBITS_DP64);
|
||||
|
||||
__CLC_LONGN is_inf_nan_or_zero =
|
||||
x == __CLC_FP_LIT(0.0) || __clc_isinf(x) || __clc_isnan(x);
|
||||
*ep = __CLC_CONVERT_INTN(__clc_select(e, 0L, is_inf_nan_or_zero));
|
||||
return __clc_select(__CLC_AS_GENTYPE(i), x, is_inf_nan_or_zero);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user