libclc: Update trigpi functions (#187579)
These were originally ported from rocm device libs in bc81ebefb7d9d9d71d20bfee2ce4cccb09701e9b. Merge in more recent changes.
This commit is contained in:
parent
a971089cb8
commit
421bf13e4b
@ -15,6 +15,9 @@ typedef struct __CLC_XCONCAT(__clc_sincos_ret_, __CLC_GENTYPE) {
|
||||
_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_SINCOS_RET_GENTYPE
|
||||
__clc_sincos_reduced_eval(__CLC_FLOATN x);
|
||||
|
||||
_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_SINCOS_RET_GENTYPE
|
||||
__clc_sincospi_reduced_eval(__CLC_FLOATN x);
|
||||
|
||||
_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_sinf_piby4(__CLC_FLOATN x,
|
||||
__CLC_FLOATN y);
|
||||
_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_cosf_piby4(__CLC_FLOATN x,
|
||||
|
||||
@ -15,5 +15,8 @@ typedef struct __CLC_XCONCAT(__clc_sincos_ret_, __CLC_GENTYPE) {
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_SINCOS_RET_GENTYPE
|
||||
__clc_sincos_reduced_eval(__CLC_HALFN x);
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_SINCOS_RET_GENTYPE
|
||||
__clc_sincospi_reduced_eval(__CLC_HALFN x);
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_INTN __clc_argReductionS(private __CLC_HALFN *r,
|
||||
__CLC_HALFN x);
|
||||
|
||||
@ -15,19 +15,13 @@ typedef struct __CLC_XCONCAT(__clc_sincos_ret_, __CLC_GENTYPE) {
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_SINCOS_RET_GENTYPE
|
||||
__clc_sincos_reduced_eval(__CLC_DOUBLEN x, __CLC_DOUBLEN y);
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_SINCOS_RET_GENTYPE
|
||||
__clc_sincospi_reduced_eval(__CLC_DOUBLEN x);
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_DOUBLEN __clc_tan_reduced_eval(__CLC_DOUBLEN x,
|
||||
__CLC_DOUBLEN y,
|
||||
__CLC_INTN is_odd);
|
||||
|
||||
_CLC_DECL _CLC_OVERLOAD void __clc_sincos_piby4(__CLC_DOUBLEN x,
|
||||
__CLC_DOUBLEN xx,
|
||||
private __CLC_DOUBLEN *sinval,
|
||||
private __CLC_DOUBLEN *cosval);
|
||||
|
||||
_CLC_DECL _CLC_OVERLOAD void __clc_tan_piby4(__CLC_DOUBLEN x, __CLC_DOUBLEN xx,
|
||||
private __CLC_DOUBLEN *leadval,
|
||||
private __CLC_DOUBLEN *tailval);
|
||||
|
||||
_CLC_DECL _CLC_OVERLOAD __CLC_INTN __clc_remainder_piby2_small(
|
||||
__CLC_DOUBLEN x, private __CLC_DOUBLEN *r, private __CLC_DOUBLEN *rr);
|
||||
|
||||
|
||||
21
libclc/clc/include/clc/math/clc_sincospi.h
Normal file
21
libclc/clc/include/clc/math/clc_sincospi.h
Normal file
@ -0,0 +1,21 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_MATH_CLC_SINCOSPI_H__
|
||||
#define __CLC_MATH_CLC_SINCOSPI_H__
|
||||
|
||||
#include "clc/internal/clc.h"
|
||||
|
||||
#define __CLC_BODY "clc/math/unary_decl_with_ptr.inc"
|
||||
#define __CLC_FUNCTION __clc_sincospi
|
||||
|
||||
#include "clc/math/gentype.inc"
|
||||
|
||||
#undef __CLC_FUNCTION
|
||||
|
||||
#endif // __CLC_MATH_CLC_SINCOSPI_H__
|
||||
17
libclc/clc/include/clc/math/clc_trigpi_helpers.h
Normal file
17
libclc/clc/include/clc/math/clc_trigpi_helpers.h
Normal file
@ -0,0 +1,17 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_MATH_CLC_TRIGPI_HELPERS_H__
|
||||
#define __CLC_MATH_CLC_TRIGPI_HELPERS_H__
|
||||
|
||||
#include "clc/internal/clc.h"
|
||||
|
||||
#define __CLC_BODY "clc/math/clc_trigpi_helpers_decl.inc"
|
||||
#include "clc/math/gentype.inc"
|
||||
|
||||
#endif // __CLC_MATH_CLC_TRIGPI_HELPERS_H__
|
||||
10
libclc/clc/include/clc/math/clc_trigpi_helpers_decl.inc
Normal file
10
libclc/clc/include/clc/math/clc_trigpi_helpers_decl.inc
Normal file
@ -0,0 +1,10 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
_CLC_DECL _CLC_OVERLOAD __CLC_INTN
|
||||
__clc_piArgReductionS(private __CLC_GENTYPE *r, __CLC_GENTYPE x);
|
||||
@ -69,6 +69,9 @@
|
||||
#define __CLC_CONVERT_S_GENTYPE __CLC_XCONCAT(__clc_convert_, __CLC_S_GENTYPE)
|
||||
#define __CLC_CONVERT_U_GENTYPE __CLC_XCONCAT(__clc_convert_, __CLC_U_GENTYPE)
|
||||
|
||||
#define __CLC_GENTYPE_S_SIGNBIT (__CLC_S_GENTYPE)((1ll << (__CLC_FPSIZE - 1)))
|
||||
#define __CLC_GENTYPE_U_SIGNBIT (__CLC_U_GENTYPE)((1ull << (__CLC_FPSIZE - 1u)))
|
||||
|
||||
#if (!defined(__CLC_HALF_ONLY) && !defined(__CLC_DOUBLE_ONLY))
|
||||
#define __CLC_SCALAR_GENTYPE float
|
||||
#define __CLC_FPSIZE 32
|
||||
|
||||
@ -156,6 +156,7 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES
|
||||
math/clc_rsqrt.cl
|
||||
math/clc_sin.cl
|
||||
math/clc_sincos.cl
|
||||
math/clc_sincospi.cl
|
||||
math/clc_sincos_helpers.cl
|
||||
math/clc_sinh.cl
|
||||
math/clc_sinpi.cl
|
||||
@ -167,6 +168,7 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES
|
||||
math/clc_tanh.cl
|
||||
math/clc_tanpi.cl
|
||||
math/clc_tgamma.cl
|
||||
math/clc_trigpi_helpers.cl
|
||||
math/clc_trunc.cl
|
||||
mem_fence/clc_mem_fence.cl
|
||||
misc/clc_shuffle.cl
|
||||
|
||||
@ -6,12 +6,8 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "clc/clc_convert.h"
|
||||
#include "clc/float/definitions.h"
|
||||
#include "clc/internal/clc.h"
|
||||
#include "clc/math/clc_fabs.h"
|
||||
#include "clc/math/clc_sincos_helpers.h"
|
||||
#include "clc/math/math.h"
|
||||
#include "clc/math/clc_cospi.h"
|
||||
#include "clc/math/clc_sincospi.h"
|
||||
|
||||
#define __CLC_BODY "clc_cospi.inc"
|
||||
#include "clc/math/gentype.inc"
|
||||
|
||||
@ -6,111 +6,8 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if __CLC_FPSIZE == 32
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cospi(__CLC_GENTYPE x) {
|
||||
__CLC_GENTYPE absx = __clc_fabs(x);
|
||||
__CLC_INTN ix = __CLC_AS_INTN(absx);
|
||||
__CLC_INTN iax = __CLC_CONVERT_INTN(absx);
|
||||
__CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
|
||||
__CLC_INTN xodd = (iax & 0x1) != 0 ? (__CLC_INTN)0x80000000 : (__CLC_INTN)0;
|
||||
|
||||
// Initialize with return for +-Inf and NaN
|
||||
__CLC_INTN ir = QNANBITPATT_SP32;
|
||||
|
||||
// 2^24 <= |x| < Inf, the result is always even integer
|
||||
ir = ix < PINFBITPATT_SP32 ? 0x3f800000 : ir;
|
||||
|
||||
// 2^23 <= |x| < 2^24, the result is always integer
|
||||
ir = ix < 0x4b800000 ? xodd | 0x3f800000 : ir;
|
||||
|
||||
// 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
|
||||
|
||||
// r < 1.0
|
||||
__CLC_GENTYPE a = 1.0f - r;
|
||||
__CLC_INTN e = 1;
|
||||
__CLC_INTN s = xodd ^ (__CLC_INTN)0x80000000;
|
||||
|
||||
// r <= 0.75
|
||||
__CLC_INTN c = r <= 0.75f;
|
||||
a = c ? r - 0.5f : a;
|
||||
e = c ? 0 : e;
|
||||
|
||||
// r < 0.5
|
||||
c = r < 0.5f;
|
||||
a = c ? 0.5f - r : a;
|
||||
s = c ? xodd : s;
|
||||
|
||||
// r <= 0.25
|
||||
c = r <= 0.25f;
|
||||
a = c ? r : a;
|
||||
e = c ? 1 : e;
|
||||
|
||||
__CLC_GENTYPE sinval, cosval;
|
||||
__clc_sincos_piby4(a * M_PI_F, &sinval, &cosval);
|
||||
__CLC_INTN jr = s ^ __CLC_AS_INTN(e != 0 ? cosval : sinval);
|
||||
|
||||
ir = ix < 0x4b000000 ? jr : ir;
|
||||
|
||||
return __CLC_AS_GENTYPE(ir);
|
||||
__CLC_GENTYPE cos;
|
||||
(void)__clc_sincospi(x, &cos);
|
||||
return cos;
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 64
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cospi(__CLC_GENTYPE x) {
|
||||
__CLC_GENTYPE absx = __clc_fabs(x);
|
||||
__CLC_LONGN ix = __CLC_AS_LONGN(absx);
|
||||
__CLC_LONGN iax = __CLC_CONVERT_LONGN(absx);
|
||||
__CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
|
||||
__CLC_LONGN xodd =
|
||||
(iax & 0x1L) != 0 ? (__CLC_LONGN)0x8000000000000000L : (__CLC_LONGN)0L;
|
||||
|
||||
// Initialize with return for +-Inf and NaN
|
||||
__CLC_LONGN ir = QNANBITPATT_DP64;
|
||||
|
||||
// 2^53 <= |x| < Inf, the result is always even integer
|
||||
ir = ix < PINFBITPATT_DP64 ? 0x3ff0000000000000L : ir;
|
||||
|
||||
// 2^52 <= |x| < 2^53, the result is always integer
|
||||
ir = absx < 0x1.0p+53 ? xodd | 0x3ff0000000000000L : ir;
|
||||
|
||||
// 0x1.0p-7 <= |x| < 2^52, result depends on which 0.25 interval
|
||||
|
||||
// r < 1.0
|
||||
__CLC_GENTYPE a = 1.0 - r;
|
||||
__CLC_LONGN e = 1;
|
||||
__CLC_LONGN s = xodd ^ (__CLC_LONGN)0x8000000000000000L;
|
||||
|
||||
// r <= 0.75
|
||||
__CLC_LONGN c = r <= 0.75;
|
||||
__CLC_GENTYPE t = r - 0.5;
|
||||
a = c ? t : a;
|
||||
e = c ? 0 : e;
|
||||
|
||||
// r < 0.5
|
||||
c = r < 0.5;
|
||||
t = 0.5 - r;
|
||||
a = c ? t : a;
|
||||
s = c ? xodd : s;
|
||||
|
||||
// r <= 0.25
|
||||
c = r <= 0.25;
|
||||
a = c ? r : a;
|
||||
e = c ? 1 : e;
|
||||
|
||||
__CLC_GENTYPE sinval, cosval;
|
||||
__clc_sincos_piby4(a * M_PI, 0.0, &sinval, &cosval);
|
||||
__CLC_LONGN jr = s ^ __CLC_AS_LONGN(e != 0 ? cosval : sinval);
|
||||
|
||||
ir = absx < 0x1.0p+52 ? jr : ir;
|
||||
|
||||
return __CLC_AS_GENTYPE(ir);
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 16
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cospi(__CLC_GENTYPE x) {
|
||||
return __CLC_CONVERT_GENTYPE(__clc_cospi(__CLC_CONVERT_FLOATN(x)));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@ -41,6 +41,33 @@ __clc_sincos_reduced_eval(__CLC_FLOATN x) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_SINCOS_RET_GENTYPE
|
||||
__clc_sincospi_reduced_eval(__CLC_GENTYPE x) {
|
||||
__CLC_GENTYPE t = x * x;
|
||||
|
||||
__CLC_GENTYPE sx =
|
||||
__clc_mad(t,
|
||||
__clc_mad(t, __clc_mad(t, 0x1.eb5482p-3f, -0x1.3e497cp-1f),
|
||||
0x1.468e6cp+1f),
|
||||
-0x1.4abc1cp+2f);
|
||||
sx = x * t * sx;
|
||||
sx = __clc_mad(x, 0x1.921fb6p+1f, sx);
|
||||
|
||||
__CLC_GENTYPE cx = __clc_mad(
|
||||
t,
|
||||
__clc_mad(t,
|
||||
__clc_mad(t, __clc_mad(t, 0x1.97ca88p-5f, 0x1.c85d3ap-3f),
|
||||
-0x1.55a3b4p+0f),
|
||||
0x1.03c1a6p+2f),
|
||||
-0x1.3bd3ccp+2f);
|
||||
cx = __clc_mad(t, cx, 1.0f);
|
||||
|
||||
__CLC_SINCOS_RET_GENTYPE ret;
|
||||
ret.cos = cx;
|
||||
ret.sin = sx;
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Evaluate single precisions sin and cos of value in interval [-pi/4, pi/4]
|
||||
_CLC_DEF _CLC_OVERLOAD void __clc_sincos_piby4(__CLC_FLOATN x,
|
||||
private __CLC_FLOATN *sinval,
|
||||
|
||||
@ -34,3 +34,19 @@ __clc_sincos_reduced_eval(__CLC_HALFN x) {
|
||||
ret.sin = __clc_mad(x, t * __clc_mad(t, 0x1.0bp-7h, -0x1.554p-3h), x);
|
||||
return ret;
|
||||
}
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_SINCOS_RET_GENTYPE
|
||||
__clc_sincospi_reduced_eval(__CLC_HALFN x) {
|
||||
__CLC_HALFN t = x * x;
|
||||
__CLC_HALFN sx = __clc_mad(t, 0x1.b84p+0h, -0x1.46cp+2h);
|
||||
sx = x * t * sx;
|
||||
sx = __clc_mad(x, 0x1.92p+1h, sx);
|
||||
|
||||
__CLC_HALFN cx = __clc_mad(t, 0x1.fbp+1h, -0x1.3bcp+2h);
|
||||
cx = __clc_mad(t, cx, 1.0h);
|
||||
|
||||
__CLC_SINCOS_RET_GENTYPE ret;
|
||||
ret.cos = cx;
|
||||
ret.sin = sx;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -41,6 +41,29 @@ __clc_sincos_reduced_eval(__CLC_DOUBLEN x, __CLC_DOUBLEN y) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_SINCOS_RET_GENTYPE
|
||||
__clc_sincospi_reduced_eval(__CLC_GENTYPE x) {
|
||||
__CLC_GENTYPE t = x * x;
|
||||
|
||||
__CLC_GENTYPE sx = __clc_mad(t, __clc_mad(t, __clc_mad(t, __clc_mad(t,
|
||||
__clc_mad(t,
|
||||
0x1.e357ef99eb0bbp-12, -0x1.e2fe76fdffd2bp-8), 0x1.50782d5f14825p-4), -0x1.32d2ccdfe9424p-1),
|
||||
0x1.466bc67754fffp+1), -0x1.4abbce625be09p+2);
|
||||
sx = x * t * sx;
|
||||
sx = __clc_mad(x, 0x1.921fb54442d18p+1, sx);
|
||||
|
||||
__CLC_GENTYPE cx = __clc_mad(t, __clc_mad(t, __clc_mad(t, __clc_mad(t,
|
||||
__clc_mad(t, __clc_mad(t,
|
||||
-0x1.b167302e21c33p-14, 0x1.f9c89ca1d4f33p-10), -0x1.a6d1e7294bff9p-6), 0x1.e1f5067b90b37p-3),
|
||||
-0x1.55d3c7e3c325bp+0), 0x1.03c1f081b5a67p+2), -0x1.3bd3cc9be45dep+2);
|
||||
cx = __clc_mad(t, cx, 1.0);
|
||||
|
||||
__CLC_SINCOS_RET_GENTYPE ret;
|
||||
ret.cos = cx;
|
||||
ret.sin = sx;
|
||||
return ret;
|
||||
}
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_DOUBLEN __clc_tan_reduced_eval(__CLC_DOUBLEN x,
|
||||
__CLC_DOUBLEN xx,
|
||||
__CLC_INTN is_odd) {
|
||||
@ -123,68 +146,6 @@ _CLC_DEF _CLC_OVERLOAD void __clc_sincos_piby4(__CLC_DOUBLEN x,
|
||||
*cosval = cp;
|
||||
}
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD void __clc_tan_piby4(__CLC_DOUBLEN x, __CLC_DOUBLEN xx,
|
||||
private __CLC_DOUBLEN *leadval,
|
||||
private __CLC_DOUBLEN *tailval) {
|
||||
// 0x3fe921fb54442d18
|
||||
const __CLC_DOUBLEN piby4_lead = 7.85398163397448278999e-01;
|
||||
// 0x3c81a62633145c06
|
||||
const __CLC_DOUBLEN piby4_tail = 3.06161699786838240164e-17;
|
||||
|
||||
// In order to maintain relative precision transform using the identity:
|
||||
// tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4.
|
||||
// Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4.
|
||||
|
||||
__CLC_LONGN ca = x > 0.68;
|
||||
__CLC_LONGN cb = x < -0.68;
|
||||
__CLC_DOUBLEN transform = ca ? 1.0 : 0.0;
|
||||
transform = cb ? -1.0 : transform;
|
||||
|
||||
__CLC_DOUBLEN tx = __clc_fma(-transform, x, piby4_lead) +
|
||||
__clc_fma(-transform, xx, piby4_tail);
|
||||
__CLC_LONGN c = ca | cb;
|
||||
x = c ? tx : x;
|
||||
xx = c ? 0.0 : xx;
|
||||
|
||||
// Core Remez [2,3] approximation to tan(x+xx) on the interval [0,0.68].
|
||||
__CLC_DOUBLEN t1 = x;
|
||||
__CLC_DOUBLEN r = __clc_fma(2.0, x * xx, x * x);
|
||||
|
||||
__CLC_DOUBLEN a = __clc_fma(r,
|
||||
__clc_fma(r, 0.224044448537022097264602535574e-3,
|
||||
-0.229345080057565662883358588111e-1),
|
||||
0.372379159759792203640806338901e0);
|
||||
|
||||
__CLC_DOUBLEN b =
|
||||
__clc_fma(r,
|
||||
__clc_fma(r,
|
||||
__clc_fma(r, -0.232371494088563558304549252913e-3,
|
||||
0.260656620398645407524064091208e-1),
|
||||
-0.515658515729031149329237816945e0),
|
||||
0.111713747927937668539901657944e1);
|
||||
|
||||
__CLC_DOUBLEN t2 = __clc_fma(MATH_DIVIDE(a, b), x * r, xx);
|
||||
|
||||
__CLC_DOUBLEN tp = t1 + t2;
|
||||
|
||||
// Compute -1.0/(t1 + t2) accurately
|
||||
__CLC_DOUBLEN z1 =
|
||||
__CLC_AS_GENTYPE(__CLC_AS_ULONGN(tp) & 0xffffffff00000000L);
|
||||
__CLC_DOUBLEN z2 = t2 - (z1 - t1);
|
||||
__CLC_DOUBLEN trec = -MATH_RECIP(tp);
|
||||
__CLC_DOUBLEN trec_top =
|
||||
__CLC_AS_GENTYPE(__CLC_AS_ULONGN(trec) & 0xffffffff00000000L);
|
||||
|
||||
__CLC_DOUBLEN tpr = __clc_fma(
|
||||
__clc_fma(trec_top, z2, __clc_fma(trec_top, z1, 1.0)), trec, trec_top);
|
||||
|
||||
__CLC_DOUBLEN tpt = transform * (1.0 - MATH_DIVIDE(2.0 * tp, 1.0 + tp));
|
||||
__CLC_DOUBLEN tptr = transform * (MATH_DIVIDE(2.0 * tp, tp - 1.0) - 1.0);
|
||||
|
||||
*leadval = c ? tpt : tp;
|
||||
*tailval = c ? tptr : tpr;
|
||||
}
|
||||
|
||||
// Reduction for small sized arguments
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_INTN __clc_remainder_piby2_small(
|
||||
__CLC_DOUBLEN x, private __CLC_DOUBLEN *rh, private __CLC_DOUBLEN *rt) {
|
||||
|
||||
22
libclc/clc/lib/generic/math/clc_sincospi.cl
Normal file
22
libclc/clc/lib/generic/math/clc_sincospi.cl
Normal file
@ -0,0 +1,22 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "clc/clc_convert.h"
|
||||
#include "clc/float/definitions.h"
|
||||
#include "clc/internal/clc.h"
|
||||
#include "clc/math/clc_cos.h"
|
||||
#include "clc/math/clc_fabs.h"
|
||||
#include "clc/math/clc_sin.h"
|
||||
#include "clc/math/clc_sincos_helpers.h"
|
||||
#include "clc/math/clc_trigpi_helpers.h"
|
||||
#include "clc/math/math.h"
|
||||
#include "clc/relational/clc_isinf.h"
|
||||
#include "clc/relational/clc_select.h"
|
||||
|
||||
#define __CLC_BODY "clc_sincospi.inc"
|
||||
#include "clc/math/gentype.inc"
|
||||
50
libclc/clc/lib/generic/math/clc_sincospi.inc
Normal file
50
libclc/clc/lib/generic/math/clc_sincospi.inc
Normal file
@ -0,0 +1,50 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
|
||||
__clc_sincospi(__CLC_GENTYPE x, __private __CLC_GENTYPE *cos_out) {
|
||||
x = __clc_select(x, __CLC_GENTYPE_NAN,
|
||||
__CLC_CONVERT_S_GENTYPE(__clc_isinf(x)));
|
||||
|
||||
__CLC_GENTYPE absx = __clc_fabs(x);
|
||||
|
||||
__CLC_GENTYPE reduced;
|
||||
__CLC_INTN n = __clc_piArgReductionS(&reduced, absx);
|
||||
|
||||
__CLC_SINCOS_RET_GENTYPE eval = __clc_sincospi_reduced_eval(reduced);
|
||||
|
||||
__CLC_S_GENTYPE flip = __CLC_CONVERT_S_GENTYPE(n > 1)
|
||||
? __CLC_GENTYPE_S_SIGNBIT
|
||||
: (__CLC_S_GENTYPE)0;
|
||||
__CLC_S_GENTYPE odd = __CLC_CONVERT_S_GENTYPE((n & 1) != 0);
|
||||
__CLC_GENTYPE s = odd ? eval.cos : eval.sin;
|
||||
|
||||
__CLC_S_GENTYPE sin_val = __CLC_AS_S_GENTYPE(s) ^ flip ^
|
||||
(__CLC_AS_S_GENTYPE(absx) ^ __CLC_AS_S_GENTYPE(x));
|
||||
|
||||
__CLC_GENTYPE c = odd ? -eval.sin : eval.cos;
|
||||
*cos_out =
|
||||
__CLC_AS_GENTYPE(__CLC_CONVERT_S_GENTYPE(__CLC_AS_S_GENTYPE(c) ^ flip));
|
||||
return __CLC_AS_GENTYPE(sin_val);
|
||||
}
|
||||
|
||||
#define __CLC_SINCOSPI_DEF(addrspace) \
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sincospi( \
|
||||
__CLC_GENTYPE x, addrspace __CLC_GENTYPE *cos_out) { \
|
||||
\
|
||||
__CLC_GENTYPE cos_result; \
|
||||
__CLC_GENTYPE sin_result = __clc_sincospi(x, &cos_result); \
|
||||
*cos_out = cos_result; \
|
||||
return sin_result; \
|
||||
}
|
||||
|
||||
__CLC_SINCOSPI_DEF(local)
|
||||
__CLC_SINCOSPI_DEF(global)
|
||||
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
|
||||
__CLC_SINCOSPI_DEF(generic)
|
||||
#endif
|
||||
@ -6,12 +6,8 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "clc/clc_convert.h"
|
||||
#include "clc/float/definitions.h"
|
||||
#include "clc/internal/clc.h"
|
||||
#include "clc/math/clc_fabs.h"
|
||||
#include "clc/math/clc_sincos_helpers.h"
|
||||
#include "clc/math/math.h"
|
||||
#include "clc/math/clc_sincospi.h"
|
||||
#include "clc/math/clc_sinpi.h"
|
||||
|
||||
#define __CLC_BODY "clc_sinpi.inc"
|
||||
#include "clc/math/gentype.inc"
|
||||
|
||||
@ -6,109 +6,7 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if __CLC_FPSIZE == 32
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinpi(__CLC_GENTYPE x) {
|
||||
__CLC_INTN ix = __CLC_AS_INTN(x);
|
||||
__CLC_INTN xsgn = ix & (__CLC_INTN)0x80000000;
|
||||
ix ^= xsgn;
|
||||
__CLC_GENTYPE absx = __clc_fabs(x);
|
||||
__CLC_INTN iax = __CLC_CONVERT_INTN(absx);
|
||||
__CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
|
||||
__CLC_INTN xodd =
|
||||
xsgn ^ ((iax & 0x1) != 0 ? (__CLC_INTN)0x80000000 : (__CLC_INTN)0);
|
||||
|
||||
// Initialize with return for +-Inf and NaN
|
||||
__CLC_INTN ir = QNANBITPATT_SP32;
|
||||
|
||||
// 2^23 <= |x| < Inf, the result is always integer
|
||||
ir = ix < PINFBITPATT_SP32 ? xsgn : ir;
|
||||
|
||||
// 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
|
||||
|
||||
// r < 1.0
|
||||
__CLC_GENTYPE a = 1.0f - r;
|
||||
__CLC_INTN e = 0;
|
||||
|
||||
// r <= 0.75
|
||||
__CLC_INTN c = r <= 0.75f;
|
||||
a = c ? r - 0.5f : a;
|
||||
e = c ? 1 : e;
|
||||
|
||||
// r < 0.5
|
||||
c = r < 0.5f;
|
||||
a = c ? 0.5f - r : a;
|
||||
|
||||
// 0 < r <= 0.25
|
||||
c = r <= 0.25f;
|
||||
a = c ? r : a;
|
||||
e = c ? 0 : e;
|
||||
|
||||
__CLC_GENTYPE sinval, cosval;
|
||||
__clc_sincos_piby4(a * M_PI_F, &sinval, &cosval);
|
||||
__CLC_INTN jr = xodd ^ __CLC_AS_INTN(e != 0 ? cosval : sinval);
|
||||
|
||||
ir = ix < 0x4b000000 ? jr : ir;
|
||||
|
||||
return __CLC_AS_GENTYPE(ir);
|
||||
__CLC_GENTYPE unused_cos;
|
||||
return __clc_sincospi(x, &unused_cos);
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 64
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinpi(__CLC_GENTYPE x) {
|
||||
__CLC_LONGN ix = __CLC_AS_LONGN(x);
|
||||
__CLC_LONGN xsgn = ix & (__CLC_LONGN)0x8000000000000000L;
|
||||
ix ^= xsgn;
|
||||
__CLC_GENTYPE absx = __clc_fabs(x);
|
||||
__CLC_LONGN iax = __CLC_CONVERT_LONGN(absx);
|
||||
__CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
|
||||
__CLC_LONGN xodd =
|
||||
xsgn ^
|
||||
((iax & 0x1L) != 0 ? (__CLC_LONGN)0x8000000000000000L : (__CLC_LONGN)0L);
|
||||
|
||||
// Initialize with return for +-Inf and NaN
|
||||
__CLC_LONGN ir = QNANBITPATT_DP64;
|
||||
|
||||
// 2^23 <= |x| < Inf, the result is always integer
|
||||
ir = ix < PINFBITPATT_DP64 ? xsgn : ir;
|
||||
|
||||
// 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
|
||||
|
||||
// r < 1.0
|
||||
__CLC_GENTYPE a = 1.0 - r;
|
||||
__CLC_LONGN e = 0;
|
||||
|
||||
// r <= 0.75
|
||||
__CLC_LONGN c = r <= 0.75;
|
||||
__CLC_GENTYPE t = r - 0.5;
|
||||
a = c ? t : a;
|
||||
e = c ? 1 : e;
|
||||
|
||||
// r < 0.5
|
||||
c = r < 0.5;
|
||||
t = 0.5 - r;
|
||||
a = c ? t : a;
|
||||
|
||||
// r <= 0.25
|
||||
c = r <= 0.25;
|
||||
a = c ? r : a;
|
||||
e = c ? 0 : e;
|
||||
|
||||
__CLC_GENTYPE api = a * M_PI;
|
||||
|
||||
__CLC_GENTYPE sinval, cosval;
|
||||
__clc_sincos_piby4(api, 0.0, &sinval, &cosval);
|
||||
__CLC_LONGN jr = xodd ^ __CLC_AS_LONGN(e != 0 ? cosval : sinval);
|
||||
|
||||
ir = absx < 0x1.0p+52 ? jr : ir;
|
||||
|
||||
return __CLC_AS_GENTYPE(ir);
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 16
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinpi(__CLC_GENTYPE x) {
|
||||
return __CLC_CONVERT_GENTYPE(__clc_sinpi(__CLC_CONVERT_FLOATN(x)));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@ -10,9 +10,13 @@
|
||||
#include "clc/float/definitions.h"
|
||||
#include "clc/internal/clc.h"
|
||||
#include "clc/math/clc_fabs.h"
|
||||
#include "clc/math/clc_mad.h"
|
||||
#include "clc/math/clc_native_recip.h"
|
||||
#include "clc/math/clc_sincos_helpers.h"
|
||||
#include "clc/math/clc_trigpi_helpers.h"
|
||||
#include "clc/math/math.h"
|
||||
#include "clc/relational/clc_isinf.h"
|
||||
#include "clc/relational/clc_select.h"
|
||||
|
||||
#define __CLC_BODY "clc_tanpi.inc"
|
||||
#include "clc/math/gentype.inc"
|
||||
|
||||
@ -8,125 +8,82 @@
|
||||
|
||||
#if __CLC_FPSIZE == 32
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_tanpi(__CLC_GENTYPE x) {
|
||||
__CLC_INTN ix = __CLC_AS_INTN(x);
|
||||
__CLC_INTN xsgn = ix & (__CLC_INTN)SIGNBIT_SP32;
|
||||
__CLC_INTN xnsgn = xsgn ^ (__CLC_INTN)SIGNBIT_SP32;
|
||||
ix ^= xsgn;
|
||||
__CLC_GENTYPE absx = __clc_fabs(x);
|
||||
__CLC_INTN iax = __CLC_CONVERT_INTN(absx);
|
||||
__CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
|
||||
__CLC_INTN xodd = xsgn ^ __CLC_AS_INTN((iax & 0x1) != 0 ? SIGNBIT_SP32 : 0);
|
||||
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE
|
||||
__clc_tanpi_reduced_eval(__CLC_GENTYPE x, __CLC_INTN is_odd) {
|
||||
__CLC_GENTYPE s = x * x;
|
||||
|
||||
// Initialize with return for +-Inf and NaN
|
||||
__CLC_INTN ir = QNANBITPATT_SP32;
|
||||
__CLC_GENTYPE t = __clc_mad(s, __clc_mad(s, __clc_mad(s, __clc_mad(s,
|
||||
__clc_mad(s, __clc_mad(s,
|
||||
0x1.7d2bd4p+16f, 0x1.a4d306p+12f), 0x1.435004p+11f), 0x1.4b6926p+9f),
|
||||
0x1.451e22p+7f), 0x1.467a9cp+5f), 0x1.4abb6ap+3f);
|
||||
|
||||
// 2^24 <= |x| < Inf, the result is always even integer
|
||||
ir = ix < PINFBITPATT_SP32 ? xsgn : ir;
|
||||
t = x * s * t;
|
||||
t = __clc_mad(x, 0x1.921fb6p+1f, t);
|
||||
|
||||
// 2^23 <= |x| < 2^24, the result is always integer
|
||||
ir = ix < 0x4b800000 ? xodd : ir;
|
||||
__CLC_GENTYPE tr = __CLC_FP_LIT(-1.0) / t;
|
||||
|
||||
// 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
|
||||
|
||||
// r < 1.0
|
||||
__CLC_GENTYPE a = 1.0f - r;
|
||||
__CLC_INTN e = 0;
|
||||
__CLC_INTN s = xnsgn;
|
||||
|
||||
// r <= 0.75
|
||||
__CLC_INTN c = r <= 0.75f;
|
||||
a = c ? r - 0.5f : a;
|
||||
e = c ? 1 : e;
|
||||
s = c ? xsgn : s;
|
||||
|
||||
// r < 0.5
|
||||
c = r < 0.5f;
|
||||
a = c ? 0.5f - r : a;
|
||||
s = c ? xnsgn : s;
|
||||
|
||||
// 0 < r <= 0.25
|
||||
c = r <= 0.25f;
|
||||
a = c ? r : a;
|
||||
e = c ? 0 : e;
|
||||
s = c ? xsgn : s;
|
||||
|
||||
__CLC_GENTYPE t = __clc_tanf_piby4(a * M_PI_F, 0);
|
||||
__CLC_GENTYPE tr = -__clc_native_recip(t);
|
||||
__CLC_INTN jr = s ^ __CLC_AS_INTN(e != 0 ? tr : t);
|
||||
|
||||
jr = r == 0.5f ? xodd | 0x7f800000 : jr;
|
||||
|
||||
ir = ix < 0x4b000000 ? jr : ir;
|
||||
|
||||
return __CLC_AS_GENTYPE(ir);
|
||||
return is_odd ? tr : t;
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 64
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_tanpi(__CLC_GENTYPE x) {
|
||||
__CLC_LONGN ix = __CLC_AS_LONGN(x);
|
||||
__CLC_LONGN xsgn = ix & (__CLC_LONGN)0x8000000000000000L;
|
||||
__CLC_LONGN xnsgn = xsgn ^ (__CLC_LONGN)0x8000000000000000L;
|
||||
ix ^= xsgn;
|
||||
__CLC_GENTYPE absx = __clc_fabs(x);
|
||||
__CLC_LONGN iax = __CLC_CONVERT_LONGN(absx);
|
||||
__CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
|
||||
__CLC_LONGN xodd =
|
||||
xsgn ^ __CLC_AS_LONGN((iax & 0x1) != 0 ? 0x8000000000000000L : 0L);
|
||||
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_DOUBLEN
|
||||
__clc_tanpi_reduced_eval(__CLC_DOUBLEN x, __CLC_INTN is_odd) {
|
||||
__CLC_GENTYPE s = x * x;
|
||||
__CLC_GENTYPE t = __clc_mad(s, __clc_mad(s, __clc_mad(s, __clc_mad(s,
|
||||
__clc_mad(s, __clc_mad(s, __clc_mad(s, __clc_mad(s,
|
||||
__clc_mad(s, __clc_mad(s, __clc_mad(s, __clc_mad(s,
|
||||
__clc_mad(s,
|
||||
0x1.3fad0a71ea6d1p+32, -0x1.11a76ac97377bp+30), 0x1.ba2bcaca6da1bp+27), -0x1.79e8e2d7aaf57p+22),
|
||||
0x1.c1c1102e46eccp+21), 0x1.31291bbcb5588p+19), 0x1.486b2d6bb3db2p+17), 0x1.45be1b46ff156p+15),
|
||||
0x1.45f61b419c746p+13), 0x1.45f311045a4ffp+11), 0x1.45f4739a998c7p+9), 0x1.45fff9b243050p+7),
|
||||
0x1.466bc6775cf74p+5), 0x1.4abbce625be8bp+3);
|
||||
t = x * s * t;
|
||||
t = __clc_mad(x, 0x1.921fb54442d18p+1, t);
|
||||
|
||||
// Initialize with return for +-Inf and NaN
|
||||
__CLC_LONGN ir = QNANBITPATT_DP64;
|
||||
__CLC_GENTYPE tr = __CLC_FP_LIT(-1.0) / t;
|
||||
|
||||
// 2^53 <= |x| < Inf, the result is always even integer
|
||||
ir = ix < PINFBITPATT_DP64 ? xsgn : ir;
|
||||
|
||||
// 2^52 <= |x| < 2^53, the result is always integer
|
||||
ir = ix < 0x4340000000000000L ? xodd : ir;
|
||||
|
||||
// 0x1.0p-14 <= |x| < 2^53, result depends on which 0.25 interval
|
||||
|
||||
// r < 1.0
|
||||
__CLC_GENTYPE a = 1.0 - r;
|
||||
__CLC_LONGN e = 0;
|
||||
__CLC_LONGN s = xnsgn;
|
||||
|
||||
// r <= 0.75
|
||||
__CLC_LONGN c = r <= 0.75;
|
||||
__CLC_GENTYPE t = r - 0.5;
|
||||
a = c ? t : a;
|
||||
e = c ? 1 : e;
|
||||
s = c ? xsgn : s;
|
||||
|
||||
// r < 0.5
|
||||
c = r < 0.5;
|
||||
t = 0.5 - r;
|
||||
a = c ? t : a;
|
||||
s = c ? xnsgn : s;
|
||||
|
||||
// r <= 0.25
|
||||
c = r <= 0.25;
|
||||
a = c ? r : a;
|
||||
e = c ? 0 : e;
|
||||
s = c ? xsgn : s;
|
||||
|
||||
__CLC_GENTYPE api = a * M_PI;
|
||||
__CLC_GENTYPE lo, hi;
|
||||
__clc_tan_piby4(api, 0.0, &lo, &hi);
|
||||
__CLC_LONGN jr = s ^ __CLC_AS_LONGN(e != 0 ? hi : lo);
|
||||
|
||||
__CLC_LONGN si = xodd | 0x7ff0000000000000L;
|
||||
jr = r == 0.5 ? si : jr;
|
||||
|
||||
ir = ix < 0x4330000000000000L ? jr : ir;
|
||||
|
||||
return __CLC_AS_GENTYPE(ir);
|
||||
return __CLC_CONVERT_LONGN(is_odd) ? tr : t;
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 16
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_tanpi(__CLC_GENTYPE x) {
|
||||
return __CLC_CONVERT_GENTYPE(__clc_tanpi(__CLC_CONVERT_FLOATN(x)));
|
||||
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_HALFN
|
||||
__clc_tanpi_reduced_eval(__CLC_HALFN x, __CLC_INTN is_odd) {
|
||||
__CLC_HALFN s = x * x;
|
||||
|
||||
__CLC_HALFN t =
|
||||
__clc_mad(s, __clc_mad(s, 0x1.3d8p+8h, 0x1.fe4p+4h), 0x1.508p+3h);
|
||||
|
||||
t = x * s * t;
|
||||
t = __clc_mad(x, 0x1.92p+1h, t);
|
||||
|
||||
__CLC_HALFN tr = __CLC_FP_LIT(-1.0) / t;
|
||||
|
||||
return __CLC_CONVERT_SHORTN(is_odd) ? tr : t;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_tanpi(__CLC_GENTYPE x) {
|
||||
x = __clc_select(x, __CLC_GENTYPE_NAN,
|
||||
__CLC_CONVERT_S_GENTYPE(__clc_isinf(x)));
|
||||
|
||||
__CLC_GENTYPE absx = __clc_fabs(x);
|
||||
|
||||
__CLC_GENTYPE reduced;
|
||||
__CLC_INTN n = __clc_piArgReductionS(&reduced, absx);
|
||||
|
||||
__CLC_GENTYPE t = __clc_tanpi_reduced_eval(reduced, (n & 1) != 0);
|
||||
|
||||
__CLC_S_GENTYPE n_1_or_2 = __CLC_CONVERT_S_GENTYPE((n == 1) || (n == 2));
|
||||
|
||||
__CLC_S_GENTYPE flip = (n_1_or_2 && (reduced == __CLC_FP_LIT(0.0)))
|
||||
? __CLC_GENTYPE_S_SIGNBIT
|
||||
: (__CLC_S_GENTYPE)0;
|
||||
|
||||
__CLC_S_GENTYPE result = (__CLC_AS_S_GENTYPE(t) ^ flip) ^
|
||||
(__CLC_AS_S_GENTYPE(x) & __CLC_GENTYPE_S_SIGNBIT);
|
||||
|
||||
return __CLC_AS_GENTYPE(result);
|
||||
}
|
||||
|
||||
16
libclc/clc/lib/generic/math/clc_trigpi_helpers.cl
Normal file
16
libclc/clc/lib/generic/math/clc_trigpi_helpers.cl
Normal file
@ -0,0 +1,16 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "clc/clc_convert.h"
|
||||
#include "clc/math/clc_fract.h"
|
||||
#include "clc/math/clc_mad.h"
|
||||
#include "clc/math/clc_rint.h"
|
||||
#include "clc/math/clc_trigpi_helpers.h"
|
||||
|
||||
#define __CLC_BODY "clc_trigpi_helpers.inc"
|
||||
#include "clc/math/gentype.inc"
|
||||
19
libclc/clc/lib/generic/math/clc_trigpi_helpers.inc
Normal file
19
libclc/clc/lib/generic/math/clc_trigpi_helpers.inc
Normal file
@ -0,0 +1,19 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_INTN
|
||||
__clc_piArgReductionS(private __CLC_GENTYPE *reduced, __CLC_GENTYPE x) {
|
||||
__CLC_GENTYPE unused;
|
||||
__CLC_GENTYPE t =
|
||||
__CLC_FP_LIT(2.0) * __clc_fract(__CLC_FP_LIT(0.5) * x, &unused);
|
||||
x = x > __CLC_FP_LIT(1.0) ? t : x;
|
||||
t = __clc_rint(__CLC_FP_LIT(2.0) * x);
|
||||
|
||||
*reduced = __clc_mad(t, __CLC_FP_LIT(-0.5), x);
|
||||
return __CLC_CONVERT_INTN(t) & 0x3;
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user