[Offload][Conformance] Add RandomGenerator for large input spaces (#154252)

This patch implements the `RandomGenerator`, a new input generator that enables conformance testing for functions with large input spaces (e.g., double-precision math functions). **Architectural Refactoring** To support different generation strategies in a clean and extensible way, the existing `ExhaustiveGenerator` was refactored into a new class hierarchy: * A new abstract base class, `RangeBasedGenerator`, was introduced using the Curiously Recurring Template Pattern (CRTP). It contains the common logic for generators that operate on a sequence of ranges. * `ExhaustiveGenerator` now inherits from this base class, simplifying its implementation. **New Components** * The new `RandomGenerator` class also inherits from `RangeBasedGenerator`. It implements a strategy that randomly samples a specified number of points from the total input space. * Random number generation is handled by a new, self-contained `RandomState` class (a `xorshift64*` PRNG seeded with `splitmix64`) to ensure deterministic and reproducible random streams for testing. **Example Usage** As a first use case and demonstration of this new capability, this patch also adds the first double-precision conformance test for the `log` function. This test uses the new `RandomGenerator` to validate the implementations from the `llvm-libm`, `cuda-math`, and `hip-math` providers.
2025-08-20 15:37:01 -03:00 · 2025-08-20 15:37:01 -03:00 · 8d7b50e572
commit 8d7b50e572
parent 9888f0c3c4
10 changed files with 361 additions and 73 deletions
--- a/offload/unittests/Conformance/device_code/CUDAMath.cpp
+++ b/offload/unittests/Conformance/device_code/CUDAMath.cpp
@ -119,6 +119,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
  runKernelBody<__nv_expm1f>(NumElements, Out, X);
 }

+__gpu_kernel void logKernel(const double *X, double *Out,
+                            size_t NumElements) noexcept {
+  runKernelBody<__nv_log>(NumElements, Out, X);
+}
+
 __gpu_kernel void logfKernel(const float *X, float *Out,
                             size_t NumElements) noexcept {
  runKernelBody<__nv_logf>(NumElements, Out, X);
--- a/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
+++ b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
@ -63,6 +63,7 @@ float __nv_expf(float);
 float __nv_exp10f(float);
 float __nv_exp2f(float);
 float __nv_expm1f(float);
+double __nv_log(double);
 float __nv_logf(float);
 float __nv_log10f(float);
 float __nv_log1pf(float);
@ -96,6 +97,7 @@ float __ocml_exp_f32(float);
 float __ocml_exp10_f32(float);
 float __ocml_exp2_f32(float);
 float __ocml_expm1_f32(float);
+double __ocml_log_f64(double);
 float __ocml_log_f32(float);
 float __ocml_log10_f32(float);
 float __ocml_log1p_f32(float);
--- a/offload/unittests/Conformance/device_code/HIPMath.cpp
+++ b/offload/unittests/Conformance/device_code/HIPMath.cpp
@ -119,6 +119,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
  runKernelBody<__ocml_expm1_f32>(NumElements, Out, X);
 }

+__gpu_kernel void logKernel(const double *X, double *Out,
+                            size_t NumElements) noexcept {
+  runKernelBody<__ocml_log_f64>(NumElements, Out, X);
+}
+
 __gpu_kernel void logfKernel(const float *X, float *Out,
                             size_t NumElements) noexcept {
  runKernelBody<__ocml_log_f32>(NumElements, Out, X);
--- a/offload/unittests/Conformance/device_code/LLVMLibm.cpp
+++ b/offload/unittests/Conformance/device_code/LLVMLibm.cpp
@ -123,6 +123,11 @@ __gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
  runKernelBody<hypotf16>(NumElements, Out, X, Y);
 }

+__gpu_kernel void logKernel(const double *X, double *Out,
+                            size_t NumElements) noexcept {
+  runKernelBody<log>(NumElements, Out, X);
+}
+
 __gpu_kernel void logfKernel(const float *X, float *Out,
                             size_t NumElements) noexcept {
  runKernelBody<logf>(NumElements, Out, X);
--- a/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
+++ b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
@ -8,8 +8,8 @@
 ///
 /// \file
 /// This file contains the definition of the ExhaustiveGenerator class, a
-/// concrete input generator that exhaustively creates inputs from a given
-/// sequence of ranges.
+/// concrete range-based generator that exhaustively creates inputs from a
+/// given sequence of ranges.
 ///
 //===----------------------------------------------------------------------===//

@ -17,89 +17,62 @@
 #define MATHTEST_EXHAUSTIVEGENERATOR_HPP

 #include "mathtest/IndexedRange.hpp"
-#include "mathtest/InputGenerator.hpp"
+#include "mathtest/RangeBasedGenerator.hpp"

-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Support/Parallel.h"
-
-#include <algorithm>
 #include <array>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
+#include <optional>
 #include <tuple>

 namespace mathtest {

 template <typename... InTypes>
 class [[nodiscard]] ExhaustiveGenerator final
-    : public InputGenerator<InTypes...> {
-  static constexpr std::size_t NumInputs = sizeof...(InTypes);
-  static_assert(NumInputs > 0, "The number of inputs must be at least 1");
+    : public RangeBasedGenerator<ExhaustiveGenerator<InTypes...>, InTypes...> {
+
+  friend class RangeBasedGenerator<ExhaustiveGenerator<InTypes...>, InTypes...>;
+
+  using Base = RangeBasedGenerator<ExhaustiveGenerator<InTypes...>, InTypes...>;
+  using IndexArrayType = std::array<uint64_t, Base::NumInputs>;
+
+  using Base::RangesTuple;
+  using Base::Size;

 public:
  explicit constexpr ExhaustiveGenerator(
      const IndexedRange<InTypes> &...Ranges) noexcept
-      : RangesTuple(Ranges...) {
-    bool Overflowed = getSizeWithOverflow(Ranges..., Size);
+      : Base(Ranges...) {
+    const auto MaybeSize = getInputSpaceSize(Ranges...);

-    assert(!Overflowed && "The input space size is too large");
-    assert((Size > 0) && "The input space size must be at least 1");
+    assert(MaybeSize.has_value() && "The size is too large");
+    Size = *MaybeSize;
+
+    assert((Size > 0) && "The size must be at least 1");

    IndexArrayType DimSizes = {};
    std::size_t DimIndex = 0;
    ((DimSizes[DimIndex++] = Ranges.getSize()), ...);

-    Strides[NumInputs - 1] = 1;
-    if constexpr (NumInputs > 1)
-      for (int Index = static_cast<int>(NumInputs) - 2; Index >= 0; --Index)
+    Strides[Base::NumInputs - 1] = 1;
+    if constexpr (Base::NumInputs > 1)
+      for (int Index = static_cast<int>(Base::NumInputs) - 2; Index >= 0;
+           --Index)
        Strides[Index] = Strides[Index + 1] * DimSizes[Index + 1];
  }

-  void reset() noexcept override { NextFlatIndex = 0; }
-
-  [[nodiscard]] std::size_t
-  fill(llvm::MutableArrayRef<InTypes>... Buffers) noexcept override {
-    const std::array<std::size_t, NumInputs> BufferSizes = {Buffers.size()...};
-    const std::size_t BufferSize = BufferSizes[0];
-    assert((BufferSize != 0) && "Buffer size cannot be zero");
-    assert(std::all_of(BufferSizes.begin(), BufferSizes.end(),
-                       [&](std::size_t Size) { return Size == BufferSize; }) &&
-           "All input buffers must have the same size");
-
-    if (NextFlatIndex >= Size)
-      return 0;
-
-    const auto BatchSize = std::min<uint64_t>(BufferSize, Size - NextFlatIndex);
-    const auto CurrentFlatIndex = NextFlatIndex;
-    NextFlatIndex += BatchSize;
-
-    auto BufferPtrsTuple = std::make_tuple(Buffers.data()...);
-
-    llvm::parallelFor(0, BatchSize, [&](std::size_t Offset) {
-      writeInputs(CurrentFlatIndex, Offset, BufferPtrsTuple);
-    });
-
-    return static_cast<std::size_t>(BatchSize);
-  }
-
 private:
-  using RangesTupleType = std::tuple<IndexedRange<InTypes>...>;
-  using IndexArrayType = std::array<uint64_t, NumInputs>;
+  [[nodiscard]] constexpr IndexArrayType
+  getNDIndex(uint64_t FlatIndex) const noexcept {
+    IndexArrayType NDIndex;

-  static bool getSizeWithOverflow(const IndexedRange<InTypes> &...Ranges,
-                                  uint64_t &Size) noexcept {
-    Size = 1;
-    bool Overflowed = false;
+    for (std::size_t Index = 0; Index < Base::NumInputs; ++Index) {
+      NDIndex[Index] = FlatIndex / Strides[Index];
+      FlatIndex -= NDIndex[Index] * Strides[Index];
+    }

-    auto Multiplier = [&](const uint64_t RangeSize) {
-      if (!Overflowed)
-        Overflowed = __builtin_mul_overflow(Size, RangeSize, &Size);
-    };
-
-    (Multiplier(Ranges.getSize()), ...);
-
-    return Overflowed;
+    return NDIndex;
  }

  template <typename BufferPtrsTupleType>
@ -109,31 +82,37 @@ private:
    writeInputsImpl<0>(NDIndex, Offset, BufferPtrsTuple);
  }

-  constexpr IndexArrayType getNDIndex(uint64_t FlatIndex) const noexcept {
-    IndexArrayType NDIndex;
-
-    for (std::size_t Index = 0; Index < NumInputs; ++Index) {
-      NDIndex[Index] = FlatIndex / Strides[Index];
-      FlatIndex -= NDIndex[Index] * Strides[Index];
-    }
-
-    return NDIndex;
-  }
-
  template <std::size_t Index, typename BufferPtrsTupleType>
  void writeInputsImpl(IndexArrayType NDIndex, uint64_t Offset,
                       BufferPtrsTupleType BufferPtrsTuple) const noexcept {
-    if constexpr (Index < NumInputs) {
+    if constexpr (Index < Base::NumInputs) {
      const auto &Range = std::get<Index>(RangesTuple);
      std::get<Index>(BufferPtrsTuple)[Offset] = Range[NDIndex[Index]];
+
      writeInputsImpl<Index + 1>(NDIndex, Offset, BufferPtrsTuple);
    }
  }

-  uint64_t Size = 1;
-  RangesTupleType RangesTuple;
+  [[nodiscard]] static constexpr std::optional<uint64_t>
+  getInputSpaceSize(const IndexedRange<InTypes> &...Ranges) noexcept {
+    uint64_t InputSpaceSize = 1;
+    bool Overflowed = false;
+
+    auto Multiplier = [&](const uint64_t RangeSize) {
+      if (!Overflowed)
+        Overflowed =
+            __builtin_mul_overflow(InputSpaceSize, RangeSize, &InputSpaceSize);
+    };
+
+    (Multiplier(Ranges.getSize()), ...);
+
+    if (Overflowed)
+      return std::nullopt;
+
+    return InputSpaceSize;
+  }
+
  IndexArrayType Strides = {};
-  uint64_t NextFlatIndex = 0;
 };
 } // namespace mathtest

--- a/offload/unittests/Conformance/include/mathtest/RandomGenerator.hpp
+++ b/offload/unittests/Conformance/include/mathtest/RandomGenerator.hpp
@ -0,0 +1,86 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of the RandomGenerator class, a concrete
+/// range-based generator that randomly creates inputs from a given sequence of
+/// ranges.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef MATHTEST_RANDOMGENERATOR_HPP
+#define MATHTEST_RANDOMGENERATOR_HPP
+
+#include "mathtest/IndexedRange.hpp"
+#include "mathtest/RandomState.hpp"
+#include "mathtest/RangeBasedGenerator.hpp"
+
+#include <cstddef>
+#include <cstdint>
+#include <tuple>
+
+namespace mathtest {
+
+template <typename... InTypes>
+class [[nodiscard]] RandomGenerator final
+    : public RangeBasedGenerator<RandomGenerator<InTypes...>, InTypes...> {
+
+  friend class RangeBasedGenerator<RandomGenerator<InTypes...>, InTypes...>;
+
+  using Base = RangeBasedGenerator<RandomGenerator<InTypes...>, InTypes...>;
+
+  using Base::RangesTuple;
+  using Base::Size;
+
+public:
+  explicit constexpr RandomGenerator(
+      SeedTy BaseSeed, uint64_t Size,
+      const IndexedRange<InTypes> &...Ranges) noexcept
+      : Base(Size, Ranges...), BaseSeed(BaseSeed) {}
+
+private:
+  [[nodiscard]] static uint64_t getRandomIndex(RandomState &RNG,
+                                               uint64_t RangeSize) noexcept {
+    if (RangeSize == 0)
+      return 0;
+
+    const uint64_t Threshold = (-RangeSize) % RangeSize;
+
+    uint64_t RandomNumber;
+    do {
+      RandomNumber = RNG.next();
+    } while (RandomNumber < Threshold);
+
+    return RandomNumber % RangeSize;
+  }
+
+  template <typename BufferPtrsTupleType>
+  void writeInputs(uint64_t CurrentFlatIndex, uint64_t Offset,
+                   BufferPtrsTupleType BufferPtrsTuple) const noexcept {
+
+    RandomState RNG(SeedTy{BaseSeed.Value ^ (CurrentFlatIndex + Offset)});
+    writeInputsImpl<0>(RNG, Offset, BufferPtrsTuple);
+  }
+
+  template <std::size_t Index, typename BufferPtrsTupleType>
+  void writeInputsImpl(RandomState &RNG, uint64_t Offset,
+                       BufferPtrsTupleType BufferPtrsTuple) const noexcept {
+    if constexpr (Index < Base::NumInputs) {
+      const auto &Range = std::get<Index>(RangesTuple);
+      const auto RandomIndex = getRandomIndex(RNG, Range.getSize());
+      std::get<Index>(BufferPtrsTuple)[Offset] = Range[RandomIndex];
+
+      writeInputsImpl<Index + 1>(RNG, Offset, BufferPtrsTuple);
+    }
+  }
+
+  SeedTy BaseSeed;
+};
+} // namespace mathtest
+
+#endif // MATHTEST_RANDOMGENERATOR_HPP
--- a/offload/unittests/Conformance/include/mathtest/RandomState.hpp
+++ b/offload/unittests/Conformance/include/mathtest/RandomState.hpp
@ -0,0 +1,53 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of the RandomState class, a fast and
+/// lightweight pseudo-random number generator.
+///
+/// The implementation is based on the xorshift* generator, seeded using the
+/// SplitMix64 generator for robust initialization. For more details on the
+/// algorithm, see: https://en.wikipedia.org/wiki/Xorshift
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef MATHTEST_RANDOMSTATE_HPP
+#define MATHTEST_RANDOMSTATE_HPP
+
+#include <cstdint>
+
+struct SeedTy {
+  uint64_t Value;
+};
+
+class [[nodiscard]] RandomState {
+  uint64_t State;
+
+  [[nodiscard]] static constexpr uint64_t splitMix64(uint64_t X) noexcept {
+    X += 0x9E3779B97F4A7C15ULL;
+    X = (X ^ (X >> 30)) * 0xBF58476D1CE4E5B9ULL;
+    X = (X ^ (X >> 27)) * 0x94D049BB133111EBULL;
+    X = (X ^ (X >> 31));
+    return X ? X : 0x9E3779B97F4A7C15ULL;
+  }
+
+public:
+  explicit constexpr RandomState(SeedTy Seed) noexcept
+      : State(splitMix64(Seed.Value)) {}
+
+  inline uint64_t next() noexcept {
+    uint64_t X = State;
+    X ^= X >> 12;
+    X ^= X << 25;
+    X ^= X >> 27;
+    State = X;
+    return X * 0x2545F4914F6CDD1DULL;
+  }
+};
+
+#endif // MATHTEST_RANDOMSTATE_HPP
--- a/offload/unittests/Conformance/include/mathtest/RangeBasedGenerator.hpp
+++ b/offload/unittests/Conformance/include/mathtest/RangeBasedGenerator.hpp
@ -0,0 +1,86 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of the RangeBasedGenerator class, a base
+/// class for input generators that operate on a sequence of ranges.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef MATHTEST_RANGEBASEDGENERATOR_HPP
+#define MATHTEST_RANGEBASEDGENERATOR_HPP
+
+#include "mathtest/IndexedRange.hpp"
+#include "mathtest/InputGenerator.hpp"
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/Parallel.h"
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <tuple>
+
+namespace mathtest {
+
+template <typename Derived, typename... InTypes>
+class [[nodiscard]] RangeBasedGenerator : public InputGenerator<InTypes...> {
+public:
+  void reset() noexcept override { NextFlatIndex = 0; }
+
+  [[nodiscard]] std::size_t
+  fill(llvm::MutableArrayRef<InTypes>... Buffers) noexcept override {
+    const std::array<std::size_t, NumInputs> BufferSizes = {Buffers.size()...};
+    const std::size_t BufferSize = BufferSizes[0];
+    assert((BufferSize != 0) && "Buffer size cannot be zero");
+    assert(std::all_of(BufferSizes.begin(), BufferSizes.end(),
+                       [&](std::size_t Size) { return Size == BufferSize; }) &&
+           "All input buffers must have the same size");
+
+    if (NextFlatIndex >= Size)
+      return 0;
+
+    const auto BatchSize = std::min<uint64_t>(BufferSize, Size - NextFlatIndex);
+    const auto CurrentFlatIndex = NextFlatIndex;
+    NextFlatIndex += BatchSize;
+
+    auto BufferPtrsTuple = std::make_tuple(Buffers.data()...);
+
+    llvm::parallelFor(0, BatchSize, [&](std::size_t Offset) {
+      static_cast<Derived *>(this)->writeInputs(CurrentFlatIndex, Offset,
+                                                BufferPtrsTuple);
+    });
+
+    return static_cast<std::size_t>(BatchSize);
+  }
+
+protected:
+  using RangesTupleType = std::tuple<IndexedRange<InTypes>...>;
+
+  static constexpr std::size_t NumInputs = sizeof...(InTypes);
+  static_assert(NumInputs > 0, "The number of inputs must be at least 1");
+
+  explicit constexpr RangeBasedGenerator(
+      const IndexedRange<InTypes> &...Ranges) noexcept
+      : RangesTuple(Ranges...) {}
+
+  explicit constexpr RangeBasedGenerator(
+      uint64_t Size, const IndexedRange<InTypes> &...Ranges) noexcept
+      : RangesTuple(Ranges...), Size(Size) {}
+
+  RangesTupleType RangesTuple;
+  uint64_t Size = 0;
+
+private:
+  uint64_t NextFlatIndex = 0;
+};
+} // namespace mathtest
+
+#endif // MATHTEST_RANGEBASEDGENERATOR_HPP
--- a/offload/unittests/Conformance/tests/CMakeLists.txt
+++ b/offload/unittests/Conformance/tests/CMakeLists.txt
@ -19,6 +19,7 @@ add_conformance_test(exp10f Exp10fTest.cpp)
 add_conformance_test(exp2f Exp2fTest.cpp)
 add_conformance_test(expm1f Expm1fTest.cpp)
 add_conformance_test(hypotf16 Hypotf16Test.cpp)
+add_conformance_test(log LogTest.cpp)
 add_conformance_test(logf LogfTest.cpp)
 add_conformance_test(log10f Log10fTest.cpp)
 add_conformance_test(log1pf Log1pfTest.cpp)
--- a/offload/unittests/Conformance/tests/LogTest.cpp
+++ b/offload/unittests/Conformance/tests/LogTest.cpp
@ -0,0 +1,66 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the conformance test of the log function.
+///
+//===----------------------------------------------------------------------===//
+
+#include "mathtest/CommandLineExtras.hpp"
+#include "mathtest/IndexedRange.hpp"
+#include "mathtest/RandomGenerator.hpp"
+#include "mathtest/RandomState.hpp"
+#include "mathtest/TestConfig.hpp"
+#include "mathtest/TestRunner.hpp"
+
+#include "llvm/ADT/StringRef.h"
+
+#include <cstdlib>
+#include <limits>
+#include <math.h>
+
+namespace {
+
+// Disambiguate the overloaded 'log' function to select the double version
+constexpr auto logd // NOLINT(readability-identifier-naming)
+    = static_cast<double (*)(double)>(log);
+} // namespace
+
+namespace mathtest {
+
+template <> struct FunctionConfig<logd> {
+  static constexpr llvm::StringRef Name = "log";
+  static constexpr llvm::StringRef KernelName = "logKernel";
+
+  // Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4,
+  //         Table 68, Khronos Registry [July 10, 2025].
+  static constexpr uint64_t UlpTolerance = 3;
+};
+} // namespace mathtest
+
+int main(int argc, const char **argv) {
+  llvm::cl::ParseCommandLineOptions(argc, argv,
+                                    "Conformance test of the log function");
+
+  using namespace mathtest;
+
+  uint64_t Seed = 42;
+  uint64_t Size = 1ULL << 32;
+  IndexedRange<double> Range(/*Begin=*/0.0,
+                             /*End=*/std::numeric_limits<double>::infinity(),
+                             /*Inclusive=*/true);
+  RandomGenerator<double> Generator(SeedTy{Seed}, Size, Range);
+
+  const auto Configs = cl::getTestConfigs();
+  const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR;
+  const bool IsVerbose = cl::IsVerbose;
+
+  bool Passed = runTests<logd>(Generator, Configs, DeviceBinaryDir, IsVerbose);
+
+  return Passed ? EXIT_SUCCESS : EXIT_FAILURE;
+}