libclc: Use special division for atan2 for DAZ (#190248)

The AMDGPU DAZ fdiv works fine in this case, so there's maybe something better we could do here.
2026-04-02 22:18:17 +02:00 · 2026-04-02 22:18:17 +02:00 · eed1f2749d
commit eed1f2749d
parent 45ac2db4e5
2 changed files with 9 additions and 1 deletions
--- a/libclc/clc/lib/generic/math/clc_atan2.cl
+++ b/libclc/clc/lib/generic/math/clc_atan2.cl
@ -10,12 +10,14 @@
 #include "clc/internal/clc.h"
 #include "clc/math/clc_atan_helpers.h"
 #include "clc/math/clc_copysign.h"
+#include "clc/math/clc_div_fast.h"
 #include "clc/math/clc_fabs.h"
 #include "clc/math/clc_fma.h"
 #include "clc/math/clc_fmax.h"
 #include "clc/math/clc_fmin.h"
 #include "clc/math/clc_ldexp.h"
 #include "clc/math/clc_mad.h"
+#include "clc/math/clc_subnormal_config.h"
 #include "clc/relational/clc_isinf.h"
 #include "clc/relational/clc_isunordered.h"
 #include "clc/relational/clc_select.h"
--- a/libclc/clc/lib/generic/math/clc_atan2.inc
+++ b/libclc/clc/lib/generic/math/clc_atan2.inc
@ -22,7 +22,13 @@ _CLC_OVERLOAD _CLC_CONST _CLC_DEF __CLC_FLOATN __clc_atan2(__CLC_FLOATN y,
  __CLC_FLOATN v = __clc_fmin(ax, ay);
  __CLC_FLOATN u = __clc_fmax(ax, ay);

-  __CLC_FLOATN vbyu = v / u;
+  __CLC_FLOATN vbyu;
+  if (__clc_denormals_are_zero_fp32()) {
+    __CLC_FLOATN s = u > 0x1.0p+96f ? 0x1.0p-32f : 1.0f;
+    vbyu = s * __clc_div_fast(v, s * u);
+  } else {
+    vbyu = v / u;
+  }

  __CLC_FLOATN a = __clc_atan_reduced(vbyu);