libclc: do not use int64 in sincos helpers (#188056)

int64 is optional, thus we do not want to force its usage for clspv.
2026-03-27 11:36:31 +01:00 · 2026-03-27 11:36:31 +01:00 · e0a1e78738
commit e0a1e78738
parent b164e7c610
3 changed files with 26 additions and 1 deletions
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@ -190,7 +190,8 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
    list(APPEND target_extra_defines CLC_SPIRV)
    set(opt_flags)
  elseif(ARCH STREQUAL clspv OR ARCH STREQUAL clspv64)
-    list(APPEND target_compile_flags -Wno-unknown-assumption)
+    list(APPEND target_compile_flags -Wno-unknown-assumption
+                                     -U__opencl_c_int64)
    list(APPEND target_extra_defines CLC_CLSPV)
  elseif(ARCH STREQUAL amdgcn)
    list(APPEND target_compile_flags "SHELL:-Xclang -mcode-object-version=none")
--- a/libclc/clc/lib/generic/math/clc_sincos_helpers.cl
+++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.cl
@ -22,6 +22,18 @@
 #include "clc/relational/clc_isinf.h"
 #include "clc/relational/clc_isnan.h"

+#ifndef __opencl_c_int64
+#include "clc/integer/clc_mul_hi.h"
+#define __CLC_FULL_MUL(A, B, HI, LO)                                           \
+  LO = A * B;                                                                  \
+  HI = __clc_mul_hi(A, B)
+
+#define __CLC_FULL_MAD(A, B, C, HI, LO)                                        \
+  LO = ((A) * (B) + (C));                                                      \
+  HI = __clc_mul_hi(A, B);                                                     \
+  HI += LO < C ? 1U : 0U;
+#endif
+
 #define bitalign(hi, lo, shift) __builtin_elementwise_fshr(hi, lo, shift)

 #define __CLC_FLOAT_ONLY
--- a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
+++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
@ -184,6 +184,17 @@ __clc_argReductionLargeS(private __CLC_FLOATN *r, __CLC_FLOATN x) {
  const __CLC_UINTN b0 = 0xFE5163ABU;

  __CLC_UINTN p0, p1, p2, p3, p4, p5, p6, p7;
+#ifndef __opencl_c_int64
+  __CLC_UINTN c0, c1;
+
+  __CLC_FULL_MUL(xm, b0, c0, p0);
+  __CLC_FULL_MAD(xm, b1, c0, c1, p1);
+  __CLC_FULL_MAD(xm, b2, c1, c0, p2);
+  __CLC_FULL_MAD(xm, b3, c0, c1, p3);
+  __CLC_FULL_MAD(xm, b4, c1, c0, p4);
+  __CLC_FULL_MAD(xm, b5, c0, c1, p5);
+  __CLC_FULL_MAD(xm, b6, c1, p7, p6);
+#else
  __CLC_ULONGN a;

  __CLC_ULONGN xm_u64 = __CLC_CONVERT_ULONGN(xm);
@ -215,6 +226,7 @@ __clc_argReductionLargeS(private __CLC_FLOATN *r, __CLC_FLOATN x) {
  a = xm_u64 * __CLC_CONVERT_ULONGN(b6) + a;
  p6 = __CLC_CONVERT_UINTN(a);
  p7 = __CLC_CONVERT_UINTN(a >> 32);
+#endif

  __CLC_UINTN fbits =
      (__CLC_UINTN)224 + (__CLC_UINTN)23 - __CLC_CONVERT_UINTN(xe);