libclc: Update trigpi functions (#187579)

These were originally ported from rocm device
libs in bc81ebefb7d9d9d71d20bfee2ce4cccb09701e9b.
Merge in more recent changes.
This commit is contained in:
Matt Arsenault 2026-03-20 08:24:23 +01:00 committed by GitHub
parent a971089cb8
commit 421bf13e4b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 310 additions and 398 deletions

View File

@ -15,6 +15,9 @@ typedef struct __CLC_XCONCAT(__clc_sincos_ret_, __CLC_GENTYPE) {
_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_SINCOS_RET_GENTYPE
__clc_sincos_reduced_eval(__CLC_FLOATN x);
_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_SINCOS_RET_GENTYPE
__clc_sincospi_reduced_eval(__CLC_FLOATN x);
_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_sinf_piby4(__CLC_FLOATN x,
__CLC_FLOATN y);
_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_cosf_piby4(__CLC_FLOATN x,

View File

@ -15,5 +15,8 @@ typedef struct __CLC_XCONCAT(__clc_sincos_ret_, __CLC_GENTYPE) {
_CLC_DEF _CLC_OVERLOAD __CLC_SINCOS_RET_GENTYPE
__clc_sincos_reduced_eval(__CLC_HALFN x);
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_SINCOS_RET_GENTYPE
__clc_sincospi_reduced_eval(__CLC_HALFN x);
_CLC_DEF _CLC_OVERLOAD __CLC_INTN __clc_argReductionS(private __CLC_HALFN *r,
__CLC_HALFN x);

View File

@ -15,19 +15,13 @@ typedef struct __CLC_XCONCAT(__clc_sincos_ret_, __CLC_GENTYPE) {
_CLC_DEF _CLC_OVERLOAD __CLC_SINCOS_RET_GENTYPE
__clc_sincos_reduced_eval(__CLC_DOUBLEN x, __CLC_DOUBLEN y);
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_SINCOS_RET_GENTYPE
__clc_sincospi_reduced_eval(__CLC_DOUBLEN x);
_CLC_DEF _CLC_OVERLOAD __CLC_DOUBLEN __clc_tan_reduced_eval(__CLC_DOUBLEN x,
__CLC_DOUBLEN y,
__CLC_INTN is_odd);
_CLC_DECL _CLC_OVERLOAD void __clc_sincos_piby4(__CLC_DOUBLEN x,
__CLC_DOUBLEN xx,
private __CLC_DOUBLEN *sinval,
private __CLC_DOUBLEN *cosval);
_CLC_DECL _CLC_OVERLOAD void __clc_tan_piby4(__CLC_DOUBLEN x, __CLC_DOUBLEN xx,
private __CLC_DOUBLEN *leadval,
private __CLC_DOUBLEN *tailval);
_CLC_DECL _CLC_OVERLOAD __CLC_INTN __clc_remainder_piby2_small(
__CLC_DOUBLEN x, private __CLC_DOUBLEN *r, private __CLC_DOUBLEN *rr);

View File

@ -0,0 +1,21 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef __CLC_MATH_CLC_SINCOSPI_H__
#define __CLC_MATH_CLC_SINCOSPI_H__
#include "clc/internal/clc.h"
#define __CLC_BODY "clc/math/unary_decl_with_ptr.inc"
#define __CLC_FUNCTION __clc_sincospi
#include "clc/math/gentype.inc"
#undef __CLC_FUNCTION
#endif // __CLC_MATH_CLC_SINCOSPI_H__

View File

@ -0,0 +1,17 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef __CLC_MATH_CLC_TRIGPI_HELPERS_H__
#define __CLC_MATH_CLC_TRIGPI_HELPERS_H__
#include "clc/internal/clc.h"
#define __CLC_BODY "clc/math/clc_trigpi_helpers_decl.inc"
#include "clc/math/gentype.inc"
#endif // __CLC_MATH_CLC_TRIGPI_HELPERS_H__

View File

@ -0,0 +1,10 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
_CLC_DECL _CLC_OVERLOAD __CLC_INTN
__clc_piArgReductionS(private __CLC_GENTYPE *r, __CLC_GENTYPE x);

View File

@ -69,6 +69,9 @@
#define __CLC_CONVERT_S_GENTYPE __CLC_XCONCAT(__clc_convert_, __CLC_S_GENTYPE)
#define __CLC_CONVERT_U_GENTYPE __CLC_XCONCAT(__clc_convert_, __CLC_U_GENTYPE)
#define __CLC_GENTYPE_S_SIGNBIT (__CLC_S_GENTYPE)((1ll << (__CLC_FPSIZE - 1)))
#define __CLC_GENTYPE_U_SIGNBIT (__CLC_U_GENTYPE)((1ull << (__CLC_FPSIZE - 1u)))
#if (!defined(__CLC_HALF_ONLY) && !defined(__CLC_DOUBLE_ONLY))
#define __CLC_SCALAR_GENTYPE float
#define __CLC_FPSIZE 32

View File

@ -156,6 +156,7 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES
math/clc_rsqrt.cl
math/clc_sin.cl
math/clc_sincos.cl
math/clc_sincospi.cl
math/clc_sincos_helpers.cl
math/clc_sinh.cl
math/clc_sinpi.cl
@ -167,6 +168,7 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES
math/clc_tanh.cl
math/clc_tanpi.cl
math/clc_tgamma.cl
math/clc_trigpi_helpers.cl
math/clc_trunc.cl
mem_fence/clc_mem_fence.cl
misc/clc_shuffle.cl

View File

@ -6,12 +6,8 @@
//
//===----------------------------------------------------------------------===//
#include "clc/clc_convert.h"
#include "clc/float/definitions.h"
#include "clc/internal/clc.h"
#include "clc/math/clc_fabs.h"
#include "clc/math/clc_sincos_helpers.h"
#include "clc/math/math.h"
#include "clc/math/clc_cospi.h"
#include "clc/math/clc_sincospi.h"
#define __CLC_BODY "clc_cospi.inc"
#include "clc/math/gentype.inc"

View File

@ -6,111 +6,8 @@
//
//===----------------------------------------------------------------------===//
#if __CLC_FPSIZE == 32
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cospi(__CLC_GENTYPE x) {
__CLC_GENTYPE absx = __clc_fabs(x);
__CLC_INTN ix = __CLC_AS_INTN(absx);
__CLC_INTN iax = __CLC_CONVERT_INTN(absx);
__CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
__CLC_INTN xodd = (iax & 0x1) != 0 ? (__CLC_INTN)0x80000000 : (__CLC_INTN)0;
// Initialize with return for +-Inf and NaN
__CLC_INTN ir = QNANBITPATT_SP32;
// 2^24 <= |x| < Inf, the result is always even integer
ir = ix < PINFBITPATT_SP32 ? 0x3f800000 : ir;
// 2^23 <= |x| < 2^24, the result is always integer
ir = ix < 0x4b800000 ? xodd | 0x3f800000 : ir;
// 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
// r < 1.0
__CLC_GENTYPE a = 1.0f - r;
__CLC_INTN e = 1;
__CLC_INTN s = xodd ^ (__CLC_INTN)0x80000000;
// r <= 0.75
__CLC_INTN c = r <= 0.75f;
a = c ? r - 0.5f : a;
e = c ? 0 : e;
// r < 0.5
c = r < 0.5f;
a = c ? 0.5f - r : a;
s = c ? xodd : s;
// r <= 0.25
c = r <= 0.25f;
a = c ? r : a;
e = c ? 1 : e;
__CLC_GENTYPE sinval, cosval;
__clc_sincos_piby4(a * M_PI_F, &sinval, &cosval);
__CLC_INTN jr = s ^ __CLC_AS_INTN(e != 0 ? cosval : sinval);
ir = ix < 0x4b000000 ? jr : ir;
return __CLC_AS_GENTYPE(ir);
__CLC_GENTYPE cos;
(void)__clc_sincospi(x, &cos);
return cos;
}
#elif __CLC_FPSIZE == 64
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cospi(__CLC_GENTYPE x) {
__CLC_GENTYPE absx = __clc_fabs(x);
__CLC_LONGN ix = __CLC_AS_LONGN(absx);
__CLC_LONGN iax = __CLC_CONVERT_LONGN(absx);
__CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
__CLC_LONGN xodd =
(iax & 0x1L) != 0 ? (__CLC_LONGN)0x8000000000000000L : (__CLC_LONGN)0L;
// Initialize with return for +-Inf and NaN
__CLC_LONGN ir = QNANBITPATT_DP64;
// 2^53 <= |x| < Inf, the result is always even integer
ir = ix < PINFBITPATT_DP64 ? 0x3ff0000000000000L : ir;
// 2^52 <= |x| < 2^53, the result is always integer
ir = absx < 0x1.0p+53 ? xodd | 0x3ff0000000000000L : ir;
// 0x1.0p-7 <= |x| < 2^52, result depends on which 0.25 interval
// r < 1.0
__CLC_GENTYPE a = 1.0 - r;
__CLC_LONGN e = 1;
__CLC_LONGN s = xodd ^ (__CLC_LONGN)0x8000000000000000L;
// r <= 0.75
__CLC_LONGN c = r <= 0.75;
__CLC_GENTYPE t = r - 0.5;
a = c ? t : a;
e = c ? 0 : e;
// r < 0.5
c = r < 0.5;
t = 0.5 - r;
a = c ? t : a;
s = c ? xodd : s;
// r <= 0.25
c = r <= 0.25;
a = c ? r : a;
e = c ? 1 : e;
__CLC_GENTYPE sinval, cosval;
__clc_sincos_piby4(a * M_PI, 0.0, &sinval, &cosval);
__CLC_LONGN jr = s ^ __CLC_AS_LONGN(e != 0 ? cosval : sinval);
ir = absx < 0x1.0p+52 ? jr : ir;
return __CLC_AS_GENTYPE(ir);
}
#elif __CLC_FPSIZE == 16
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cospi(__CLC_GENTYPE x) {
return __CLC_CONVERT_GENTYPE(__clc_cospi(__CLC_CONVERT_FLOATN(x)));
}
#endif

View File

@ -41,6 +41,33 @@ __clc_sincos_reduced_eval(__CLC_FLOATN x) {
return ret;
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_SINCOS_RET_GENTYPE
__clc_sincospi_reduced_eval(__CLC_GENTYPE x) {
__CLC_GENTYPE t = x * x;
__CLC_GENTYPE sx =
__clc_mad(t,
__clc_mad(t, __clc_mad(t, 0x1.eb5482p-3f, -0x1.3e497cp-1f),
0x1.468e6cp+1f),
-0x1.4abc1cp+2f);
sx = x * t * sx;
sx = __clc_mad(x, 0x1.921fb6p+1f, sx);
__CLC_GENTYPE cx = __clc_mad(
t,
__clc_mad(t,
__clc_mad(t, __clc_mad(t, 0x1.97ca88p-5f, 0x1.c85d3ap-3f),
-0x1.55a3b4p+0f),
0x1.03c1a6p+2f),
-0x1.3bd3ccp+2f);
cx = __clc_mad(t, cx, 1.0f);
__CLC_SINCOS_RET_GENTYPE ret;
ret.cos = cx;
ret.sin = sx;
return ret;
}
// Evaluate single precisions sin and cos of value in interval [-pi/4, pi/4]
_CLC_DEF _CLC_OVERLOAD void __clc_sincos_piby4(__CLC_FLOATN x,
private __CLC_FLOATN *sinval,

View File

@ -34,3 +34,19 @@ __clc_sincos_reduced_eval(__CLC_HALFN x) {
ret.sin = __clc_mad(x, t * __clc_mad(t, 0x1.0bp-7h, -0x1.554p-3h), x);
return ret;
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_SINCOS_RET_GENTYPE
__clc_sincospi_reduced_eval(__CLC_HALFN x) {
__CLC_HALFN t = x * x;
__CLC_HALFN sx = __clc_mad(t, 0x1.b84p+0h, -0x1.46cp+2h);
sx = x * t * sx;
sx = __clc_mad(x, 0x1.92p+1h, sx);
__CLC_HALFN cx = __clc_mad(t, 0x1.fbp+1h, -0x1.3bcp+2h);
cx = __clc_mad(t, cx, 1.0h);
__CLC_SINCOS_RET_GENTYPE ret;
ret.cos = cx;
ret.sin = sx;
return ret;
}

View File

@ -41,6 +41,29 @@ __clc_sincos_reduced_eval(__CLC_DOUBLEN x, __CLC_DOUBLEN y) {
return ret;
}
_CLC_DEF _CLC_OVERLOAD __CLC_SINCOS_RET_GENTYPE
__clc_sincospi_reduced_eval(__CLC_GENTYPE x) {
__CLC_GENTYPE t = x * x;
__CLC_GENTYPE sx = __clc_mad(t, __clc_mad(t, __clc_mad(t, __clc_mad(t,
__clc_mad(t,
0x1.e357ef99eb0bbp-12, -0x1.e2fe76fdffd2bp-8), 0x1.50782d5f14825p-4), -0x1.32d2ccdfe9424p-1),
0x1.466bc67754fffp+1), -0x1.4abbce625be09p+2);
sx = x * t * sx;
sx = __clc_mad(x, 0x1.921fb54442d18p+1, sx);
__CLC_GENTYPE cx = __clc_mad(t, __clc_mad(t, __clc_mad(t, __clc_mad(t,
__clc_mad(t, __clc_mad(t,
-0x1.b167302e21c33p-14, 0x1.f9c89ca1d4f33p-10), -0x1.a6d1e7294bff9p-6), 0x1.e1f5067b90b37p-3),
-0x1.55d3c7e3c325bp+0), 0x1.03c1f081b5a67p+2), -0x1.3bd3cc9be45dep+2);
cx = __clc_mad(t, cx, 1.0);
__CLC_SINCOS_RET_GENTYPE ret;
ret.cos = cx;
ret.sin = sx;
return ret;
}
_CLC_DEF _CLC_OVERLOAD __CLC_DOUBLEN __clc_tan_reduced_eval(__CLC_DOUBLEN x,
__CLC_DOUBLEN xx,
__CLC_INTN is_odd) {
@ -123,68 +146,6 @@ _CLC_DEF _CLC_OVERLOAD void __clc_sincos_piby4(__CLC_DOUBLEN x,
*cosval = cp;
}
_CLC_DEF _CLC_OVERLOAD void __clc_tan_piby4(__CLC_DOUBLEN x, __CLC_DOUBLEN xx,
private __CLC_DOUBLEN *leadval,
private __CLC_DOUBLEN *tailval) {
// 0x3fe921fb54442d18
const __CLC_DOUBLEN piby4_lead = 7.85398163397448278999e-01;
// 0x3c81a62633145c06
const __CLC_DOUBLEN piby4_tail = 3.06161699786838240164e-17;
// In order to maintain relative precision transform using the identity:
// tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4.
// Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4.
__CLC_LONGN ca = x > 0.68;
__CLC_LONGN cb = x < -0.68;
__CLC_DOUBLEN transform = ca ? 1.0 : 0.0;
transform = cb ? -1.0 : transform;
__CLC_DOUBLEN tx = __clc_fma(-transform, x, piby4_lead) +
__clc_fma(-transform, xx, piby4_tail);
__CLC_LONGN c = ca | cb;
x = c ? tx : x;
xx = c ? 0.0 : xx;
// Core Remez [2,3] approximation to tan(x+xx) on the interval [0,0.68].
__CLC_DOUBLEN t1 = x;
__CLC_DOUBLEN r = __clc_fma(2.0, x * xx, x * x);
__CLC_DOUBLEN a = __clc_fma(r,
__clc_fma(r, 0.224044448537022097264602535574e-3,
-0.229345080057565662883358588111e-1),
0.372379159759792203640806338901e0);
__CLC_DOUBLEN b =
__clc_fma(r,
__clc_fma(r,
__clc_fma(r, -0.232371494088563558304549252913e-3,
0.260656620398645407524064091208e-1),
-0.515658515729031149329237816945e0),
0.111713747927937668539901657944e1);
__CLC_DOUBLEN t2 = __clc_fma(MATH_DIVIDE(a, b), x * r, xx);
__CLC_DOUBLEN tp = t1 + t2;
// Compute -1.0/(t1 + t2) accurately
__CLC_DOUBLEN z1 =
__CLC_AS_GENTYPE(__CLC_AS_ULONGN(tp) & 0xffffffff00000000L);
__CLC_DOUBLEN z2 = t2 - (z1 - t1);
__CLC_DOUBLEN trec = -MATH_RECIP(tp);
__CLC_DOUBLEN trec_top =
__CLC_AS_GENTYPE(__CLC_AS_ULONGN(trec) & 0xffffffff00000000L);
__CLC_DOUBLEN tpr = __clc_fma(
__clc_fma(trec_top, z2, __clc_fma(trec_top, z1, 1.0)), trec, trec_top);
__CLC_DOUBLEN tpt = transform * (1.0 - MATH_DIVIDE(2.0 * tp, 1.0 + tp));
__CLC_DOUBLEN tptr = transform * (MATH_DIVIDE(2.0 * tp, tp - 1.0) - 1.0);
*leadval = c ? tpt : tp;
*tailval = c ? tptr : tpr;
}
// Reduction for small sized arguments
_CLC_DEF _CLC_OVERLOAD __CLC_INTN __clc_remainder_piby2_small(
__CLC_DOUBLEN x, private __CLC_DOUBLEN *rh, private __CLC_DOUBLEN *rt) {

View File

@ -0,0 +1,22 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clc/clc_convert.h"
#include "clc/float/definitions.h"
#include "clc/internal/clc.h"
#include "clc/math/clc_cos.h"
#include "clc/math/clc_fabs.h"
#include "clc/math/clc_sin.h"
#include "clc/math/clc_sincos_helpers.h"
#include "clc/math/clc_trigpi_helpers.h"
#include "clc/math/math.h"
#include "clc/relational/clc_isinf.h"
#include "clc/relational/clc_select.h"
#define __CLC_BODY "clc_sincospi.inc"
#include "clc/math/gentype.inc"

View File

@ -0,0 +1,50 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
__clc_sincospi(__CLC_GENTYPE x, __private __CLC_GENTYPE *cos_out) {
x = __clc_select(x, __CLC_GENTYPE_NAN,
__CLC_CONVERT_S_GENTYPE(__clc_isinf(x)));
__CLC_GENTYPE absx = __clc_fabs(x);
__CLC_GENTYPE reduced;
__CLC_INTN n = __clc_piArgReductionS(&reduced, absx);
__CLC_SINCOS_RET_GENTYPE eval = __clc_sincospi_reduced_eval(reduced);
__CLC_S_GENTYPE flip = __CLC_CONVERT_S_GENTYPE(n > 1)
? __CLC_GENTYPE_S_SIGNBIT
: (__CLC_S_GENTYPE)0;
__CLC_S_GENTYPE odd = __CLC_CONVERT_S_GENTYPE((n & 1) != 0);
__CLC_GENTYPE s = odd ? eval.cos : eval.sin;
__CLC_S_GENTYPE sin_val = __CLC_AS_S_GENTYPE(s) ^ flip ^
(__CLC_AS_S_GENTYPE(absx) ^ __CLC_AS_S_GENTYPE(x));
__CLC_GENTYPE c = odd ? -eval.sin : eval.cos;
*cos_out =
__CLC_AS_GENTYPE(__CLC_CONVERT_S_GENTYPE(__CLC_AS_S_GENTYPE(c) ^ flip));
return __CLC_AS_GENTYPE(sin_val);
}
#define __CLC_SINCOSPI_DEF(addrspace) \
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sincospi( \
__CLC_GENTYPE x, addrspace __CLC_GENTYPE *cos_out) { \
\
__CLC_GENTYPE cos_result; \
__CLC_GENTYPE sin_result = __clc_sincospi(x, &cos_result); \
*cos_out = cos_result; \
return sin_result; \
}
__CLC_SINCOSPI_DEF(local)
__CLC_SINCOSPI_DEF(global)
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
__CLC_SINCOSPI_DEF(generic)
#endif

View File

@ -6,12 +6,8 @@
//
//===----------------------------------------------------------------------===//
#include "clc/clc_convert.h"
#include "clc/float/definitions.h"
#include "clc/internal/clc.h"
#include "clc/math/clc_fabs.h"
#include "clc/math/clc_sincos_helpers.h"
#include "clc/math/math.h"
#include "clc/math/clc_sincospi.h"
#include "clc/math/clc_sinpi.h"
#define __CLC_BODY "clc_sinpi.inc"
#include "clc/math/gentype.inc"

View File

@ -6,109 +6,7 @@
//
//===----------------------------------------------------------------------===//
#if __CLC_FPSIZE == 32
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinpi(__CLC_GENTYPE x) {
__CLC_INTN ix = __CLC_AS_INTN(x);
__CLC_INTN xsgn = ix & (__CLC_INTN)0x80000000;
ix ^= xsgn;
__CLC_GENTYPE absx = __clc_fabs(x);
__CLC_INTN iax = __CLC_CONVERT_INTN(absx);
__CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
__CLC_INTN xodd =
xsgn ^ ((iax & 0x1) != 0 ? (__CLC_INTN)0x80000000 : (__CLC_INTN)0);
// Initialize with return for +-Inf and NaN
__CLC_INTN ir = QNANBITPATT_SP32;
// 2^23 <= |x| < Inf, the result is always integer
ir = ix < PINFBITPATT_SP32 ? xsgn : ir;
// 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
// r < 1.0
__CLC_GENTYPE a = 1.0f - r;
__CLC_INTN e = 0;
// r <= 0.75
__CLC_INTN c = r <= 0.75f;
a = c ? r - 0.5f : a;
e = c ? 1 : e;
// r < 0.5
c = r < 0.5f;
a = c ? 0.5f - r : a;
// 0 < r <= 0.25
c = r <= 0.25f;
a = c ? r : a;
e = c ? 0 : e;
__CLC_GENTYPE sinval, cosval;
__clc_sincos_piby4(a * M_PI_F, &sinval, &cosval);
__CLC_INTN jr = xodd ^ __CLC_AS_INTN(e != 0 ? cosval : sinval);
ir = ix < 0x4b000000 ? jr : ir;
return __CLC_AS_GENTYPE(ir);
__CLC_GENTYPE unused_cos;
return __clc_sincospi(x, &unused_cos);
}
#elif __CLC_FPSIZE == 64
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinpi(__CLC_GENTYPE x) {
__CLC_LONGN ix = __CLC_AS_LONGN(x);
__CLC_LONGN xsgn = ix & (__CLC_LONGN)0x8000000000000000L;
ix ^= xsgn;
__CLC_GENTYPE absx = __clc_fabs(x);
__CLC_LONGN iax = __CLC_CONVERT_LONGN(absx);
__CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
__CLC_LONGN xodd =
xsgn ^
((iax & 0x1L) != 0 ? (__CLC_LONGN)0x8000000000000000L : (__CLC_LONGN)0L);
// Initialize with return for +-Inf and NaN
__CLC_LONGN ir = QNANBITPATT_DP64;
// 2^23 <= |x| < Inf, the result is always integer
ir = ix < PINFBITPATT_DP64 ? xsgn : ir;
// 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
// r < 1.0
__CLC_GENTYPE a = 1.0 - r;
__CLC_LONGN e = 0;
// r <= 0.75
__CLC_LONGN c = r <= 0.75;
__CLC_GENTYPE t = r - 0.5;
a = c ? t : a;
e = c ? 1 : e;
// r < 0.5
c = r < 0.5;
t = 0.5 - r;
a = c ? t : a;
// r <= 0.25
c = r <= 0.25;
a = c ? r : a;
e = c ? 0 : e;
__CLC_GENTYPE api = a * M_PI;
__CLC_GENTYPE sinval, cosval;
__clc_sincos_piby4(api, 0.0, &sinval, &cosval);
__CLC_LONGN jr = xodd ^ __CLC_AS_LONGN(e != 0 ? cosval : sinval);
ir = absx < 0x1.0p+52 ? jr : ir;
return __CLC_AS_GENTYPE(ir);
}
#elif __CLC_FPSIZE == 16
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinpi(__CLC_GENTYPE x) {
return __CLC_CONVERT_GENTYPE(__clc_sinpi(__CLC_CONVERT_FLOATN(x)));
}
#endif

View File

@ -10,9 +10,13 @@
#include "clc/float/definitions.h"
#include "clc/internal/clc.h"
#include "clc/math/clc_fabs.h"
#include "clc/math/clc_mad.h"
#include "clc/math/clc_native_recip.h"
#include "clc/math/clc_sincos_helpers.h"
#include "clc/math/clc_trigpi_helpers.h"
#include "clc/math/math.h"
#include "clc/relational/clc_isinf.h"
#include "clc/relational/clc_select.h"
#define __CLC_BODY "clc_tanpi.inc"
#include "clc/math/gentype.inc"

View File

@ -8,125 +8,82 @@
#if __CLC_FPSIZE == 32
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_tanpi(__CLC_GENTYPE x) {
__CLC_INTN ix = __CLC_AS_INTN(x);
__CLC_INTN xsgn = ix & (__CLC_INTN)SIGNBIT_SP32;
__CLC_INTN xnsgn = xsgn ^ (__CLC_INTN)SIGNBIT_SP32;
ix ^= xsgn;
__CLC_GENTYPE absx = __clc_fabs(x);
__CLC_INTN iax = __CLC_CONVERT_INTN(absx);
__CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
__CLC_INTN xodd = xsgn ^ __CLC_AS_INTN((iax & 0x1) != 0 ? SIGNBIT_SP32 : 0);
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE
__clc_tanpi_reduced_eval(__CLC_GENTYPE x, __CLC_INTN is_odd) {
__CLC_GENTYPE s = x * x;
// Initialize with return for +-Inf and NaN
__CLC_INTN ir = QNANBITPATT_SP32;
__CLC_GENTYPE t = __clc_mad(s, __clc_mad(s, __clc_mad(s, __clc_mad(s,
__clc_mad(s, __clc_mad(s,
0x1.7d2bd4p+16f, 0x1.a4d306p+12f), 0x1.435004p+11f), 0x1.4b6926p+9f),
0x1.451e22p+7f), 0x1.467a9cp+5f), 0x1.4abb6ap+3f);
// 2^24 <= |x| < Inf, the result is always even integer
ir = ix < PINFBITPATT_SP32 ? xsgn : ir;
t = x * s * t;
t = __clc_mad(x, 0x1.921fb6p+1f, t);
// 2^23 <= |x| < 2^24, the result is always integer
ir = ix < 0x4b800000 ? xodd : ir;
__CLC_GENTYPE tr = __CLC_FP_LIT(-1.0) / t;
// 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
// r < 1.0
__CLC_GENTYPE a = 1.0f - r;
__CLC_INTN e = 0;
__CLC_INTN s = xnsgn;
// r <= 0.75
__CLC_INTN c = r <= 0.75f;
a = c ? r - 0.5f : a;
e = c ? 1 : e;
s = c ? xsgn : s;
// r < 0.5
c = r < 0.5f;
a = c ? 0.5f - r : a;
s = c ? xnsgn : s;
// 0 < r <= 0.25
c = r <= 0.25f;
a = c ? r : a;
e = c ? 0 : e;
s = c ? xsgn : s;
__CLC_GENTYPE t = __clc_tanf_piby4(a * M_PI_F, 0);
__CLC_GENTYPE tr = -__clc_native_recip(t);
__CLC_INTN jr = s ^ __CLC_AS_INTN(e != 0 ? tr : t);
jr = r == 0.5f ? xodd | 0x7f800000 : jr;
ir = ix < 0x4b000000 ? jr : ir;
return __CLC_AS_GENTYPE(ir);
return is_odd ? tr : t;
}
#elif __CLC_FPSIZE == 64
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_tanpi(__CLC_GENTYPE x) {
__CLC_LONGN ix = __CLC_AS_LONGN(x);
__CLC_LONGN xsgn = ix & (__CLC_LONGN)0x8000000000000000L;
__CLC_LONGN xnsgn = xsgn ^ (__CLC_LONGN)0x8000000000000000L;
ix ^= xsgn;
__CLC_GENTYPE absx = __clc_fabs(x);
__CLC_LONGN iax = __CLC_CONVERT_LONGN(absx);
__CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
__CLC_LONGN xodd =
xsgn ^ __CLC_AS_LONGN((iax & 0x1) != 0 ? 0x8000000000000000L : 0L);
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_DOUBLEN
__clc_tanpi_reduced_eval(__CLC_DOUBLEN x, __CLC_INTN is_odd) {
__CLC_GENTYPE s = x * x;
__CLC_GENTYPE t = __clc_mad(s, __clc_mad(s, __clc_mad(s, __clc_mad(s,
__clc_mad(s, __clc_mad(s, __clc_mad(s, __clc_mad(s,
__clc_mad(s, __clc_mad(s, __clc_mad(s, __clc_mad(s,
__clc_mad(s,
0x1.3fad0a71ea6d1p+32, -0x1.11a76ac97377bp+30), 0x1.ba2bcaca6da1bp+27), -0x1.79e8e2d7aaf57p+22),
0x1.c1c1102e46eccp+21), 0x1.31291bbcb5588p+19), 0x1.486b2d6bb3db2p+17), 0x1.45be1b46ff156p+15),
0x1.45f61b419c746p+13), 0x1.45f311045a4ffp+11), 0x1.45f4739a998c7p+9), 0x1.45fff9b243050p+7),
0x1.466bc6775cf74p+5), 0x1.4abbce625be8bp+3);
t = x * s * t;
t = __clc_mad(x, 0x1.921fb54442d18p+1, t);
// Initialize with return for +-Inf and NaN
__CLC_LONGN ir = QNANBITPATT_DP64;
__CLC_GENTYPE tr = __CLC_FP_LIT(-1.0) / t;
// 2^53 <= |x| < Inf, the result is always even integer
ir = ix < PINFBITPATT_DP64 ? xsgn : ir;
// 2^52 <= |x| < 2^53, the result is always integer
ir = ix < 0x4340000000000000L ? xodd : ir;
// 0x1.0p-14 <= |x| < 2^53, result depends on which 0.25 interval
// r < 1.0
__CLC_GENTYPE a = 1.0 - r;
__CLC_LONGN e = 0;
__CLC_LONGN s = xnsgn;
// r <= 0.75
__CLC_LONGN c = r <= 0.75;
__CLC_GENTYPE t = r - 0.5;
a = c ? t : a;
e = c ? 1 : e;
s = c ? xsgn : s;
// r < 0.5
c = r < 0.5;
t = 0.5 - r;
a = c ? t : a;
s = c ? xnsgn : s;
// r <= 0.25
c = r <= 0.25;
a = c ? r : a;
e = c ? 0 : e;
s = c ? xsgn : s;
__CLC_GENTYPE api = a * M_PI;
__CLC_GENTYPE lo, hi;
__clc_tan_piby4(api, 0.0, &lo, &hi);
__CLC_LONGN jr = s ^ __CLC_AS_LONGN(e != 0 ? hi : lo);
__CLC_LONGN si = xodd | 0x7ff0000000000000L;
jr = r == 0.5 ? si : jr;
ir = ix < 0x4330000000000000L ? jr : ir;
return __CLC_AS_GENTYPE(ir);
return __CLC_CONVERT_LONGN(is_odd) ? tr : t;
}
#elif __CLC_FPSIZE == 16
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_tanpi(__CLC_GENTYPE x) {
return __CLC_CONVERT_GENTYPE(__clc_tanpi(__CLC_CONVERT_FLOATN(x)));
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_HALFN
__clc_tanpi_reduced_eval(__CLC_HALFN x, __CLC_INTN is_odd) {
__CLC_HALFN s = x * x;
__CLC_HALFN t =
__clc_mad(s, __clc_mad(s, 0x1.3d8p+8h, 0x1.fe4p+4h), 0x1.508p+3h);
t = x * s * t;
t = __clc_mad(x, 0x1.92p+1h, t);
__CLC_HALFN tr = __CLC_FP_LIT(-1.0) / t;
return __CLC_CONVERT_SHORTN(is_odd) ? tr : t;
}
#endif
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_tanpi(__CLC_GENTYPE x) {
x = __clc_select(x, __CLC_GENTYPE_NAN,
__CLC_CONVERT_S_GENTYPE(__clc_isinf(x)));
__CLC_GENTYPE absx = __clc_fabs(x);
__CLC_GENTYPE reduced;
__CLC_INTN n = __clc_piArgReductionS(&reduced, absx);
__CLC_GENTYPE t = __clc_tanpi_reduced_eval(reduced, (n & 1) != 0);
__CLC_S_GENTYPE n_1_or_2 = __CLC_CONVERT_S_GENTYPE((n == 1) || (n == 2));
__CLC_S_GENTYPE flip = (n_1_or_2 && (reduced == __CLC_FP_LIT(0.0)))
? __CLC_GENTYPE_S_SIGNBIT
: (__CLC_S_GENTYPE)0;
__CLC_S_GENTYPE result = (__CLC_AS_S_GENTYPE(t) ^ flip) ^
(__CLC_AS_S_GENTYPE(x) & __CLC_GENTYPE_S_SIGNBIT);
return __CLC_AS_GENTYPE(result);
}

View File

@ -0,0 +1,16 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clc/clc_convert.h"
#include "clc/math/clc_fract.h"
#include "clc/math/clc_mad.h"
#include "clc/math/clc_rint.h"
#include "clc/math/clc_trigpi_helpers.h"
#define __CLC_BODY "clc_trigpi_helpers.inc"
#include "clc/math/gentype.inc"

View File

@ -0,0 +1,19 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
_CLC_DEF _CLC_OVERLOAD __CLC_INTN
__clc_piArgReductionS(private __CLC_GENTYPE *reduced, __CLC_GENTYPE x) {
__CLC_GENTYPE unused;
__CLC_GENTYPE t =
__CLC_FP_LIT(2.0) * __clc_fract(__CLC_FP_LIT(0.5) * x, &unused);
x = x > __CLC_FP_LIT(1.0) ? t : x;
t = __clc_rint(__CLC_FP_LIT(2.0) * x);
*reduced = __clc_mad(t, __CLC_FP_LIT(-0.5), x);
return __CLC_CONVERT_INTN(t) & 0x3;
}