[Offload][Conformance] Add randomized tests for single-precision bivariate math functions (#154663)

This patch adds a new set of randomized conformance tests for single-precision bivariate math functions. The functions included in this set were selected based on the following criteria: - An implementation exists in `libc/src/math/generic` (i.e., it is not just a wrapper around a compiler built-in). - The corresponding LLVM CPU libm implementation is correctly rounded. - The function is listed in Table 65 of the OpenCL C Specification v3.0.19.
2025-08-21 13:27:25 -03:00 · 2025-08-21 13:27:25 -03:00 · eed5f06ae8
commit eed5f06ae8
parent e20fa4f412
8 changed files with 276 additions and 0 deletions
--- a/offload/unittests/Conformance/device_code/CUDAMath.cpp
+++ b/offload/unittests/Conformance/device_code/CUDAMath.cpp
@ -26,6 +26,10 @@ using namespace kernels;
 // Helpers
 //===----------------------------------------------------------------------===//

+static inline float powfRoundedExponent(float Base, float Exponent) {
+  return __nv_powf(Base, __nv_roundf(Exponent));
+}
+
 static inline float sincosfSin(float X) {
  float SinX, CosX;
  __nv_sincosf(X, &SinX, &CosX);
@ -69,6 +73,11 @@ __gpu_kernel void atanfKernel(const float *X, float *Out,
  runKernelBody<__nv_atanf>(NumElements, Out, X);
 }

+__gpu_kernel void atan2fKernel(const float *X, const float *Y, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__nv_atan2f>(NumElements, Out, X, Y);
+}
+
 __gpu_kernel void atanhfKernel(const float *X, float *Out,
                               size_t NumElements) noexcept {
  runKernelBody<__nv_atanhf>(NumElements, Out, X);
@ -119,6 +128,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
  runKernelBody<__nv_expm1f>(NumElements, Out, X);
 }

+__gpu_kernel void hypotfKernel(const float *X, float *Y, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__nv_hypotf>(NumElements, Out, X, Y);
+}
+
 __gpu_kernel void logKernel(const double *X, double *Out,
                            size_t NumElements) noexcept {
  runKernelBody<__nv_log>(NumElements, Out, X);
@ -144,6 +158,17 @@ __gpu_kernel void log2fKernel(const float *X, float *Out,
  runKernelBody<__nv_log2f>(NumElements, Out, X);
 }

+__gpu_kernel void powfKernel(const float *X, float *Y, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<__nv_powf>(NumElements, Out, X, Y);
+}
+
+__gpu_kernel void powfRoundedExponentKernel(const float *X, float *Y,
+                                            float *Out,
+                                            size_t NumElements) noexcept {
+  runKernelBody<powfRoundedExponent>(NumElements, Out, X, Y);
+}
+
 __gpu_kernel void sinfKernel(const float *X, float *Out,
                             size_t NumElements) noexcept {
  runKernelBody<__nv_sinf>(NumElements, Out, X);
--- a/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
+++ b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
@ -53,6 +53,7 @@ float __nv_acoshf(float);
 float __nv_asinf(float);
 float __nv_asinhf(float);
 float __nv_atanf(float);
+float __nv_atan2f(float, float);
 float __nv_atanhf(float);
 float __nv_cbrtf(float);
 float __nv_cosf(float);
@ -63,11 +64,14 @@ float __nv_expf(float);
 float __nv_exp10f(float);
 float __nv_exp2f(float);
 float __nv_expm1f(float);
+float __nv_hypotf(float, float);
 double __nv_log(double);
 float __nv_logf(float);
 float __nv_log10f(float);
 float __nv_log1pf(float);
 float __nv_log2f(float);
+float __nv_powf(float, float);
+float __nv_roundf(float);
 float __nv_sinf(float);
 void __nv_sincosf(float, float *, float *);
 float __nv_sinhf(float);
@ -87,6 +91,7 @@ float __ocml_acosh_f32(float);
 float __ocml_asin_f32(float);
 float __ocml_asinh_f32(float);
 float __ocml_atan_f32(float);
+float __ocml_atan2_f32(float, float);
 float __ocml_atanh_f32(float);
 float __ocml_cbrt_f32(float);
 float __ocml_cos_f32(float);
@ -97,11 +102,14 @@ float __ocml_exp_f32(float);
 float __ocml_exp10_f32(float);
 float __ocml_exp2_f32(float);
 float __ocml_expm1_f32(float);
+float __ocml_hypot_f32(float, float);
 double __ocml_log_f64(double);
 float __ocml_log_f32(float);
 float __ocml_log10_f32(float);
 float __ocml_log1p_f32(float);
 float __ocml_log2_f32(float);
+float __ocml_pow_f32(float, float);
+float __ocml_round_f32(float);
 float __ocml_sin_f32(float);
 float __ocml_sincos_f32(float, float *);
 float __ocml_sinh_f32(float);
--- a/offload/unittests/Conformance/device_code/HIPMath.cpp
+++ b/offload/unittests/Conformance/device_code/HIPMath.cpp
@ -26,6 +26,10 @@ using namespace kernels;
 // Helpers
 //===----------------------------------------------------------------------===//

+static inline float powfRoundedExponent(float Base, float Exponent) {
+  return __ocml_pow_f32(Base, __ocml_round_f32(Exponent));
+}
+
 static inline float sincosfSin(float X) {
  float CosX;
  float SinX = __ocml_sincos_f32(X, &CosX);
@ -69,6 +73,11 @@ __gpu_kernel void atanfKernel(const float *X, float *Out,
  runKernelBody<__ocml_atan_f32>(NumElements, Out, X);
 }

+__gpu_kernel void atan2fKernel(const float *X, const float *Y, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__ocml_atan2_f32>(NumElements, Out, X, Y);
+}
+
 __gpu_kernel void atanhfKernel(const float *X, float *Out,
                               size_t NumElements) noexcept {
  runKernelBody<__ocml_atanh_f32>(NumElements, Out, X);
@ -119,6 +128,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
  runKernelBody<__ocml_expm1_f32>(NumElements, Out, X);
 }

+__gpu_kernel void hypotfKernel(const float *X, float *Y, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__ocml_hypot_f32>(NumElements, Out, X, Y);
+}
+
 __gpu_kernel void logKernel(const double *X, double *Out,
                            size_t NumElements) noexcept {
  runKernelBody<__ocml_log_f64>(NumElements, Out, X);
@ -144,6 +158,17 @@ __gpu_kernel void log2fKernel(const float *X, float *Out,
  runKernelBody<__ocml_log2_f32>(NumElements, Out, X);
 }

+__gpu_kernel void powfKernel(const float *X, float *Y, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<__ocml_pow_f32>(NumElements, Out, X, Y);
+}
+
+__gpu_kernel void powfRoundedExponentKernel(const float *X, float *Y,
+                                            float *Out,
+                                            size_t NumElements) noexcept {
+  runKernelBody<powfRoundedExponent>(NumElements, Out, X, Y);
+}
+
 __gpu_kernel void sinfKernel(const float *X, float *Out,
                             size_t NumElements) noexcept {
  runKernelBody<__ocml_sin_f32>(NumElements, Out, X);
--- a/offload/unittests/Conformance/device_code/LLVMLibm.cpp
+++ b/offload/unittests/Conformance/device_code/LLVMLibm.cpp
@ -25,6 +25,10 @@ using namespace kernels;
 // Helpers
 //===----------------------------------------------------------------------===//

+static inline float powfRoundedExponent(float Base, float Exponent) {
+  return powf(Base, roundf(Exponent));
+}
+
 static inline float sincosfSin(float X) {
  float SinX, CosX;
  sincosf(X, &SinX, &CosX);
@ -68,6 +72,11 @@ __gpu_kernel void atanfKernel(const float *X, float *Out,
  runKernelBody<atanf>(NumElements, Out, X);
 }

+__gpu_kernel void atan2fKernel(const float *X, const float *Y, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<atan2f>(NumElements, Out, X, Y);
+}
+
 __gpu_kernel void atanhfKernel(const float *X, float *Out,
                               size_t NumElements) noexcept {
  runKernelBody<atanhf>(NumElements, Out, X);
@ -118,6 +127,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
  runKernelBody<expm1f>(NumElements, Out, X);
 }

+__gpu_kernel void hypotfKernel(const float *X, float *Y, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<hypotf>(NumElements, Out, X, Y);
+}
+
 __gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
                                 size_t NumElements) noexcept {
  runKernelBody<hypotf16>(NumElements, Out, X, Y);
@ -148,6 +162,17 @@ __gpu_kernel void log2fKernel(const float *X, float *Out,
  runKernelBody<log2f>(NumElements, Out, X);
 }

+__gpu_kernel void powfKernel(const float *X, float *Y, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<powf>(NumElements, Out, X, Y);
+}
+
+__gpu_kernel void powfRoundedExponentKernel(const float *X, float *Y,
+                                            float *Out,
+                                            size_t NumElements) noexcept {
+  runKernelBody<powfRoundedExponent>(NumElements, Out, X, Y);
+}
+
 __gpu_kernel void sinfKernel(const float *X, float *Out,
                             size_t NumElements) noexcept {
  runKernelBody<sinf>(NumElements, Out, X);
--- a/offload/unittests/Conformance/tests/Atan2fTest.cpp
+++ b/offload/unittests/Conformance/tests/Atan2fTest.cpp
@ -0,0 +1,58 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the conformance test of the atan2f function.
+///
+//===----------------------------------------------------------------------===//
+
+#include "mathtest/CommandLineExtras.hpp"
+#include "mathtest/IndexedRange.hpp"
+#include "mathtest/RandomGenerator.hpp"
+#include "mathtest/RandomState.hpp"
+#include "mathtest/TestConfig.hpp"
+#include "mathtest/TestRunner.hpp"
+
+#include "llvm/ADT/StringRef.h"
+
+#include <cstdlib>
+#include <math.h>
+
+namespace mathtest {
+
+template <> struct FunctionConfig<atan2f> {
+  static constexpr llvm::StringRef Name = "atan2f";
+  static constexpr llvm::StringRef KernelName = "atan2fKernel";
+
+  // Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4,
+  //         Table 65, Khronos Registry [July 10, 2025].
+  static constexpr uint64_t UlpTolerance = 6;
+};
+} // namespace mathtest
+
+int main(int argc, const char **argv) {
+  llvm::cl::ParseCommandLineOptions(argc, argv,
+                                    "Conformance test of the atan2f function");
+
+  using namespace mathtest;
+
+  uint64_t Seed = 42;
+  uint64_t Size = 1ULL << 32;
+  IndexedRange<float> RangeX;
+  IndexedRange<float> RangeY;
+  RandomGenerator<float, float> Generator(SeedTy{Seed}, Size, RangeX, RangeY);
+
+  const auto Configs = cl::getTestConfigs();
+  const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR;
+  const bool IsVerbose = cl::IsVerbose;
+
+  bool Passed =
+      runTests<atan2f>(Generator, Configs, DeviceBinaryDir, IsVerbose);
+
+  return Passed ? EXIT_SUCCESS : EXIT_FAILURE;
+}
--- a/offload/unittests/Conformance/tests/CMakeLists.txt
+++ b/offload/unittests/Conformance/tests/CMakeLists.txt
@ -8,6 +8,7 @@ add_conformance_test(acoshf AcoshfTest.cpp)
 add_conformance_test(asinf AsinfTest.cpp)
 add_conformance_test(asinhf AsinhfTest.cpp)
 add_conformance_test(atanf AtanfTest.cpp)
+add_conformance_test(atan2f Atan2fTest.cpp)
 add_conformance_test(atanhf AtanhfTest.cpp)
 add_conformance_test(cbrtf CbrtfTest.cpp)
 add_conformance_test(cosf CosfTest.cpp)
@ -18,12 +19,14 @@ add_conformance_test(expf ExpfTest.cpp)
 add_conformance_test(exp10f Exp10fTest.cpp)
 add_conformance_test(exp2f Exp2fTest.cpp)
 add_conformance_test(expm1f Expm1fTest.cpp)
+add_conformance_test(hypotf HypotfTest.cpp)
 add_conformance_test(hypotf16 Hypotf16Test.cpp)
 add_conformance_test(log LogTest.cpp)
 add_conformance_test(logf LogfTest.cpp)
 add_conformance_test(log10f Log10fTest.cpp)
 add_conformance_test(log1pf Log1pfTest.cpp)
 add_conformance_test(log2f Log2fTest.cpp)
+add_conformance_test(powf PowfTest.cpp)
 add_conformance_test(sinf SinfTest.cpp)
 add_conformance_test(sincosf SincosfTest.cpp)
 add_conformance_test(sinhf SinhfTest.cpp)
--- a/offload/unittests/Conformance/tests/HypotfTest.cpp
+++ b/offload/unittests/Conformance/tests/HypotfTest.cpp
@ -0,0 +1,58 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the conformance test of the hypotf function.
+///
+//===----------------------------------------------------------------------===//
+
+#include "mathtest/CommandLineExtras.hpp"
+#include "mathtest/IndexedRange.hpp"
+#include "mathtest/RandomGenerator.hpp"
+#include "mathtest/RandomState.hpp"
+#include "mathtest/TestConfig.hpp"
+#include "mathtest/TestRunner.hpp"
+
+#include "llvm/ADT/StringRef.h"
+
+#include <cstdlib>
+#include <math.h>
+
+namespace mathtest {
+
+template <> struct FunctionConfig<hypotf> {
+  static constexpr llvm::StringRef Name = "hypotf";
+  static constexpr llvm::StringRef KernelName = "hypotfKernel";
+
+  // Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4,
+  //         Table 65, Khronos Registry [July 10, 2025].
+  static constexpr uint64_t UlpTolerance = 4;
+};
+} // namespace mathtest
+
+int main(int argc, const char **argv) {
+  llvm::cl::ParseCommandLineOptions(argc, argv,
+                                    "Conformance test of the hypotf function");
+
+  using namespace mathtest;
+
+  uint64_t Seed = 42;
+  uint64_t Size = 1ULL << 32;
+  IndexedRange<float> RangeX;
+  IndexedRange<float> RangeY;
+  RandomGenerator<float, float> Generator(SeedTy{Seed}, Size, RangeX, RangeY);
+
+  const auto Configs = cl::getTestConfigs();
+  const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR;
+  const bool IsVerbose = cl::IsVerbose;
+
+  bool Passed =
+      runTests<hypotf>(Generator, Configs, DeviceBinaryDir, IsVerbose);
+
+  return Passed ? EXIT_SUCCESS : EXIT_FAILURE;
+}
--- a/offload/unittests/Conformance/tests/PowfTest.cpp
+++ b/offload/unittests/Conformance/tests/PowfTest.cpp
@ -0,0 +1,74 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the conformance test of the powf function.
+///
+//===----------------------------------------------------------------------===//
+
+#include "mathtest/CommandLineExtras.hpp"
+#include "mathtest/IndexedRange.hpp"
+#include "mathtest/RandomGenerator.hpp"
+#include "mathtest/RandomState.hpp"
+#include "mathtest/TestConfig.hpp"
+#include "mathtest/TestRunner.hpp"
+
+#include "llvm/ADT/StringRef.h"
+
+#include <cstdlib>
+#include <math.h>
+
+static inline float powfRoundedExponent(float Base, float Exponent) {
+  return powf(Base, roundf(Exponent));
+}
+
+namespace mathtest {
+
+template <> struct FunctionConfig<powf> {
+  static constexpr llvm::StringRef Name = "powf (real exponents)";
+  static constexpr llvm::StringRef KernelName = "powfKernel";
+
+  // Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4,
+  //         Table 65, Khronos Registry [July 10, 2025].
+  static constexpr uint64_t UlpTolerance = 16;
+};
+
+template <> struct FunctionConfig<powfRoundedExponent> {
+  static constexpr llvm::StringRef Name = "powf (integer exponents)";
+  static constexpr llvm::StringRef KernelName = "powfRoundedExponentKernel";
+
+  // Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4,
+  //         Table 65, Khronos Registry [July 10, 2025].
+  static constexpr uint64_t UlpTolerance = 16;
+};
+} // namespace mathtest
+
+int main(int argc, const char **argv) {
+  llvm::cl::ParseCommandLineOptions(argc, argv,
+                                    "Conformance test of the powf function");
+
+  using namespace mathtest;
+
+  uint64_t Size = 1ULL << 32;
+  IndexedRange<float> RangeX;
+  IndexedRange<float> RangeY;
+  RandomGenerator<float, float> Generator0(SeedTy{42}, Size, RangeX, RangeY);
+  RandomGenerator<float, float> Generator1(SeedTy{51}, Size, RangeX, RangeY);
+
+  const auto Configs = cl::getTestConfigs();
+  const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR;
+  const bool IsVerbose = cl::IsVerbose;
+
+  bool RealExponentsPassed =
+      runTests<powf>(Generator0, Configs, DeviceBinaryDir, IsVerbose);
+  bool IntegerExponentsPassed = runTests<powfRoundedExponent>(
+      Generator1, Configs, DeviceBinaryDir, IsVerbose);
+
+  return (RealExponentsPassed && IntegerExponentsPassed) ? EXIT_SUCCESS
+                                                         : EXIT_FAILURE;
+}