libclc: Update acosh (#188224)

libclc: Update acosh

This was originally ported from rocm device libs
in ca4d382e119e1389c83dbb07d9ca0085e88b2944. Merge in
more recent changes.

Remove unused ep_log.
This commit is contained in:
Matt Arsenault 2026-03-24 13:07:00 +01:00 committed by GitHub
parent 2857f3bb6a
commit 8b224162fa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 28 additions and 235 deletions

View File

@ -1,15 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef __CLC_MATH_CLC_EP_LOG_H__
#define __CLC_MATH_CLC_EP_LOG_H__
#define __CLC_BODY "clc/math/clc_ep_log_decl.inc"
#include "clc/math/gentype.inc"
#endif // __CLC_MATH_CLC_EP_LOG_H__

View File

@ -1,16 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#if __CLC_FPSIZE == 64
_CLC_DECL _CLC_OVERLOAD void __clc_ep_log(__CLC_GENTYPE x,
private __CLC_INTN *xexp,
private __CLC_GENTYPE *r1,
private __CLC_GENTYPE *r2);
#endif

View File

@ -77,7 +77,6 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES
math/clc_div_cr.cl
math/clc_div_fast.cl
math/clc_ep.cl
math/clc_ep_log.cl
math/clc_erf.cl
math/clc_erfc.cl
math/clc_exp.cl

View File

@ -5,20 +5,13 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clc/clc_convert.h"
#include "clc/float/definitions.h"
#include "clc/internal/clc.h"
#include "clc/math/clc_ep_log.h"
#include "clc/math/clc_fabs.h"
#include "clc/math/clc_fma.h"
#include "clc/math/clc_log1p.h"
#include "clc/math/clc_acosh.h"
#include "clc/math/clc_ep.h"
#include "clc/math/clc_log2_fast.h"
#include "clc/math/clc_mad.h"
#include "clc/math/clc_sqrt.h"
#include "clc/math/math.h"
#include "clc/relational/clc_isinf.h"
#include "clc/relational/clc_isnan.h"
#include "clc/relational/clc_select.h"
#include "clc/math/clc_sqrt_fast.h"
#define __CLC_BODY "clc_acosh.inc"
#include "clc/math/gentype.inc"

View File

@ -8,104 +8,40 @@
#if __CLC_FPSIZE == 32
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_acosh(__CLC_GENTYPE x) {
__CLC_UINTN ux = __CLC_AS_UINTN(x);
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_FLOATN __clc_acosh(__CLC_FLOATN x) {
__CLC_INTN b = x >= 0x1.0p+64f;
__CLC_FLOATN s = b ? 0x1.0p-64f : 1.0f;
__CLC_FLOATN sx = x * s;
__CLC_EP_PAIR a =
__clc_ep_add(sx, __clc_ep_sqrt(__clc_ep_sub(__clc_ep_sqr(sx), s * s)));
__CLC_FLOATN z = __clc_ep_ln_hi(a, b ? 64 : 0);
// Arguments greater than 1/sqrt(epsilon) in magnitude are approximated by
// acosh(x) = ln(2) + ln(x)
// For 2.0 <= x <= 1/sqrt(epsilon) the approximation is:
// acosh(x) = ln(x + sqrt(x * x - 1))
__CLC_INTN high = ux > 0x46000000U;
__CLC_INTN med = ux > 0x40000000U;
__CLC_GENTYPE w = x - 1.0f;
__CLC_GENTYPE s = w * w + 2.0f * w;
__CLC_GENTYPE t = x * x - 1.0f;
__CLC_GENTYPE r = __clc_sqrt(med ? t : s) + (med ? x : w);
__CLC_GENTYPE v = (high ? x : r) - (med ? 1.0f : 0.0f);
__CLC_GENTYPE z = __clc_log1p(v) + (high ? 0x1.62e430p-1f : 0.0f);
z = __clc_select(z, x, __clc_isnan(x) || __clc_isinf(x));
z = x < 1.0f ? __CLC_GENTYPE_NAN : z;
return z;
z = x == __CLC_GENTYPE_INF ? x : z;
return x < 1.0f ? FLT_NAN : z;
}
#elif __CLC_FPSIZE == 64
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_acosh(__CLC_GENTYPE x) {
// 1/sqrt(eps) = 9.49062656242515593767e+07
const __CLC_GENTYPE recrteps = 0x1.6a09e667f3bcdp+26;
// log2_lead and log2_tail sum to an extra-precise version of log(2)
const __CLC_GENTYPE log2_lead = 0x1.62e42ep-1;
const __CLC_GENTYPE log2_tail = 0x1.efa39ef35793cp-25;
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_DOUBLEN __clc_acosh(__CLC_DOUBLEN x) {
__CLC_LONGN b = x >= 0x1.0p+512;
__CLC_DOUBLEN s = b ? 0x1.0p-512 : 1.0;
__CLC_DOUBLEN sx = x * s;
__CLC_EP_PAIR a =
__clc_ep_add(sx, __clc_ep_sqrt(__clc_ep_sub(__clc_ep_sqr(sx), s * s)));
__CLC_DOUBLEN z = __clc_ep_ln_hi(a, __CLC_CONVERT_INTN(b) ? 512 : 0);
// Handle x >= 128 here
__CLC_LONGN xlarge = x > recrteps;
__CLC_GENTYPE r = x + __clc_sqrt(__clc_fma(x, x, -1.0));
r = xlarge ? x : r;
__CLC_INTN xexp;
__CLC_GENTYPE r1, r2;
__clc_ep_log(r, &xexp, &r1, &r2);
__CLC_GENTYPE dxexp = __CLC_CONVERT_GENTYPE(
__CLC_CONVERT_LONGN(xexp) + (xlarge ? (__CLC_LONGN)1 : (__CLC_LONGN)0));
r1 = __clc_fma(dxexp, log2_lead, r1);
r2 = __clc_fma(dxexp, log2_tail, r2);
__CLC_GENTYPE ret1 = r1 + r2;
// Handle 1 < x < 128 here
// We compute the value
// t = x - 1.0 + sqrt(2.0*(x - 1.0) + (x - 1.0)*(x - 1.0))
// using simulated quad precision.
__CLC_GENTYPE t = x - 1.0;
__CLC_GENTYPE u1 = t * 2.0;
// (t,0) * (t,0) -> (v1, v2)
__CLC_GENTYPE v1 = t * t;
__CLC_GENTYPE v2 = __clc_fma(t, t, -v1);
// (u1,0) + (v1,v2) -> (w1,w2)
r = u1 + v1;
__CLC_GENTYPE s = (((u1 - r) + v1) + v2);
__CLC_GENTYPE w1 = r + s;
__CLC_GENTYPE w2 = (r - w1) + s;
// sqrt(w1,w2) -> (u1,u2)
__CLC_GENTYPE p1 = __clc_sqrt(w1);
__CLC_GENTYPE a1 = p1 * p1;
__CLC_GENTYPE a2 = __clc_fma(p1, p1, -a1);
__CLC_GENTYPE temp = (((w1 - a1) - a2) + w2);
__CLC_GENTYPE p2 = MATH_DIVIDE(temp * 0.5, p1);
u1 = p1 + p2;
__CLC_GENTYPE u2 = (p1 - u1) + p2;
// (u1,u2) + (t,0) -> (r1,r2)
r = u1 + t;
s = ((u1 - r) + t) + u2;
// r1 = r + s;
// r2 = (r - r1) + s;
// t = r1 + r2;
t = r + s;
// For arguments 1.13 <= x <= 1.5 the log1p function is good enough
__CLC_GENTYPE ret2 = __clc_log1p(t);
__CLC_GENTYPE ret = x >= 128.0 ? ret1 : ret2;
ret = (__clc_isinf(x) || __clc_isnan(x)) ? x : ret;
ret = x == 1.0 ? 0.0 : ret;
ret = x < 1.0 ? __CLC_GENTYPE_NAN : ret;
return ret;
z = x == __CLC_GENTYPE_INF ? x : z;
return x < 1.0 ? DBL_NAN : z;
}
#elif __CLC_FPSIZE == 16
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_acosh(__CLC_GENTYPE x) {
return __CLC_CONVERT_GENTYPE(__clc_acosh(__CLC_CONVERT_FLOATN(x)));
_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_HALFN __clc_acosh(__CLC_HALFN hx) {
__CLC_FLOATN x = __CLC_CONVERT_FLOATN(hx);
__CLC_FLOATN t = x + __clc_sqrt_fast(__clc_mad(x, x, -1.0f));
__CLC_HALFN ret = __CLC_CONVERT_HALFN(__clc_log2_fast(t) * 0x1.62e430p-1f);
return hx < 1.0h ? HALF_NAN : ret;
}
#endif

View File

@ -1,23 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifdef cl_khr_fp64
#include "clc/clc_convert.h"
#include "clc/internal/clc.h"
#include "clc/math/clc_ep_log.h"
#include "clc/math/clc_fma.h"
#include "clc/math/math.h"
#include "clc/math/tables.h"
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#define __CLC_BODY "clc_ep_log.inc"
#include "clc/math/gentype.inc"
#endif

View File

@ -1,81 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Computes natural log(x). Algorithm based on:
// Ping-Tak Peter Tang
// "Table-driven implementation of the logarithm function in IEEE floating-point
// arithmetic"
// ACM Transactions on Mathematical Software (TOMS) Volume 16, Issue 4 (December
// 1990)
//
//===----------------------------------------------------------------------===//
#if __CLC_FPSIZE == 64
#define LN0 8.33333333333317923934e-02
#define LN1 1.25000000037717509602e-02
#define LN2 2.23213998791944806202e-03
#define LN3 4.34887777707614552256e-04
#define LF0 8.33333333333333593622e-02
#define LF1 1.24999999978138668903e-02
#define LF2 2.23219810758559851206e-03
_CLC_DEF _CLC_OVERLOAD void __clc_ep_log(__CLC_GENTYPE x,
private __CLC_INTN *xexp,
private __CLC_GENTYPE *r1,
private __CLC_GENTYPE *r2) {
__CLC_LONGN near_one = x >= 0x1.e0faap-1 && x <= 0x1.1082cp+0;
__CLC_ULONGN ux = __CLC_AS_ULONGN(x);
__CLC_ULONGN uxs =
__CLC_AS_ULONGN(__CLC_AS_GENTYPE(0x03d0000000000000UL | ux) - 0x1.0p-962);
__CLC_LONGN c = ux < IMPBIT_DP64;
ux = c ? uxs : ux;
__CLC_INTN expadjust =
__CLC_CONVERT_INTN(c ? (__CLC_LONGN)60 : (__CLC_LONGN)0);
// Store the exponent of x in xexp and put f into the range [0.5,1)
__CLC_INTN xexp1 = __CLC_CONVERT_INTN((ux >> EXPSHIFTBITS_DP64) & 0x7ff) -
EXPBIAS_DP64 - expadjust;
__CLC_GENTYPE f = __CLC_AS_GENTYPE(HALFEXPBITS_DP64 | (ux & MANTBITS_DP64));
*xexp = __CLC_CONVERT_INTN(near_one) ? 0 : xexp1;
__CLC_GENTYPE r = x - 1.0;
__CLC_GENTYPE u1 = MATH_DIVIDE(r, 2.0 + r);
__CLC_GENTYPE ru1 = -r * u1;
u1 = u1 + u1;
__CLC_INTN index = __CLC_CONVERT_INTN(ux >> 45);
index = ((0x80 | (index & 0x7e)) >> 1) + (index & 0x1);
__CLC_GENTYPE f1 = __CLC_CONVERT_GENTYPE(index) * 0x1.0p-7;
__CLC_GENTYPE f2 = f - f1;
__CLC_GENTYPE u2 = MATH_DIVIDE(f2, __clc_fma(0.5, f2, f1));
__CLC_GENTYPE z1 = __CLC_USE_TABLE(ln_tbl_lo, (index - 64));
__CLC_GENTYPE q = __CLC_USE_TABLE(ln_tbl_hi, (index - 64));
z1 = near_one ? r : z1;
q = near_one ? 0.0 : q;
__CLC_GENTYPE u = near_one ? u1 : u2;
__CLC_GENTYPE v = u * u;
__CLC_GENTYPE cc = near_one ? ru1 : u2;
__CLC_GENTYPE z21 =
__clc_fma(v, __clc_fma(v, __clc_fma(v, LN3, LN2), LN1), LN0);
__CLC_GENTYPE z22 = __clc_fma(v, __clc_fma(v, LF2, LF1), LF0);
__CLC_GENTYPE z2 = near_one ? z21 : z22;
z2 = __clc_fma(u * v, z2, cc) + q;
*r1 = z1;
*r2 = z2;
}
#endif