[Offload][Conformance] Add RandomGenerator for large input spaces (#154252)

This patch implements the `RandomGenerator`, a new input generator that
enables conformance testing for functions with large input spaces (e.g.,
double-precision math functions).

**Architectural Refactoring**

To support different generation strategies in a clean and extensible
way, the existing `ExhaustiveGenerator` was refactored into a new class
hierarchy:
* A new abstract base class, `RangeBasedGenerator`, was introduced using
the Curiously Recurring Template Pattern (CRTP). It contains the common
logic for generators that operate on a sequence of ranges.
* `ExhaustiveGenerator` now inherits from this base class, simplifying
its implementation.

**New Components**
* The new `RandomGenerator` class also inherits from
`RangeBasedGenerator`. It implements a strategy that randomly samples a
specified number of points from the total input space.
* Random number generation is handled by a new, self-contained
`RandomState` class (a `xorshift64*` PRNG seeded with `splitmix64`) to
ensure deterministic and reproducible random streams for testing.

**Example Usage**

As a first use case and demonstration of this new capability, this patch
also adds the first double-precision conformance test for the `log`
function. This test uses the new `RandomGenerator` to validate the
implementations from the `llvm-libm`, `cuda-math`, and `hip-math`
providers.
This commit is contained in:
Leandro Lacerda 2025-08-20 15:37:01 -03:00 committed by GitHub
parent 9888f0c3c4
commit 8d7b50e572
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 361 additions and 73 deletions

View File

@ -119,6 +119,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
runKernelBody<__nv_expm1f>(NumElements, Out, X);
}
__gpu_kernel void logKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<__nv_log>(NumElements, Out, X);
}
__gpu_kernel void logfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__nv_logf>(NumElements, Out, X);

View File

@ -63,6 +63,7 @@ float __nv_expf(float);
float __nv_exp10f(float);
float __nv_exp2f(float);
float __nv_expm1f(float);
double __nv_log(double);
float __nv_logf(float);
float __nv_log10f(float);
float __nv_log1pf(float);
@ -96,6 +97,7 @@ float __ocml_exp_f32(float);
float __ocml_exp10_f32(float);
float __ocml_exp2_f32(float);
float __ocml_expm1_f32(float);
double __ocml_log_f64(double);
float __ocml_log_f32(float);
float __ocml_log10_f32(float);
float __ocml_log1p_f32(float);

View File

@ -119,6 +119,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
runKernelBody<__ocml_expm1_f32>(NumElements, Out, X);
}
__gpu_kernel void logKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_log_f64>(NumElements, Out, X);
}
__gpu_kernel void logfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_log_f32>(NumElements, Out, X);

View File

@ -123,6 +123,11 @@ __gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
runKernelBody<hypotf16>(NumElements, Out, X, Y);
}
__gpu_kernel void logKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<log>(NumElements, Out, X);
}
__gpu_kernel void logfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<logf>(NumElements, Out, X);

View File

@ -8,8 +8,8 @@
///
/// \file
/// This file contains the definition of the ExhaustiveGenerator class, a
/// concrete input generator that exhaustively creates inputs from a given
/// sequence of ranges.
/// concrete range-based generator that exhaustively creates inputs from a
/// given sequence of ranges.
///
//===----------------------------------------------------------------------===//
@ -17,89 +17,62 @@
#define MATHTEST_EXHAUSTIVEGENERATOR_HPP
#include "mathtest/IndexedRange.hpp"
#include "mathtest/InputGenerator.hpp"
#include "mathtest/RangeBasedGenerator.hpp"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/Parallel.h"
#include <algorithm>
#include <array>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <optional>
#include <tuple>
namespace mathtest {
template <typename... InTypes>
class [[nodiscard]] ExhaustiveGenerator final
: public InputGenerator<InTypes...> {
static constexpr std::size_t NumInputs = sizeof...(InTypes);
static_assert(NumInputs > 0, "The number of inputs must be at least 1");
: public RangeBasedGenerator<ExhaustiveGenerator<InTypes...>, InTypes...> {
friend class RangeBasedGenerator<ExhaustiveGenerator<InTypes...>, InTypes...>;
using Base = RangeBasedGenerator<ExhaustiveGenerator<InTypes...>, InTypes...>;
using IndexArrayType = std::array<uint64_t, Base::NumInputs>;
using Base::RangesTuple;
using Base::Size;
public:
explicit constexpr ExhaustiveGenerator(
const IndexedRange<InTypes> &...Ranges) noexcept
: RangesTuple(Ranges...) {
bool Overflowed = getSizeWithOverflow(Ranges..., Size);
: Base(Ranges...) {
const auto MaybeSize = getInputSpaceSize(Ranges...);
assert(!Overflowed && "The input space size is too large");
assert((Size > 0) && "The input space size must be at least 1");
assert(MaybeSize.has_value() && "The size is too large");
Size = *MaybeSize;
assert((Size > 0) && "The size must be at least 1");
IndexArrayType DimSizes = {};
std::size_t DimIndex = 0;
((DimSizes[DimIndex++] = Ranges.getSize()), ...);
Strides[NumInputs - 1] = 1;
if constexpr (NumInputs > 1)
for (int Index = static_cast<int>(NumInputs) - 2; Index >= 0; --Index)
Strides[Base::NumInputs - 1] = 1;
if constexpr (Base::NumInputs > 1)
for (int Index = static_cast<int>(Base::NumInputs) - 2; Index >= 0;
--Index)
Strides[Index] = Strides[Index + 1] * DimSizes[Index + 1];
}
void reset() noexcept override { NextFlatIndex = 0; }
[[nodiscard]] std::size_t
fill(llvm::MutableArrayRef<InTypes>... Buffers) noexcept override {
const std::array<std::size_t, NumInputs> BufferSizes = {Buffers.size()...};
const std::size_t BufferSize = BufferSizes[0];
assert((BufferSize != 0) && "Buffer size cannot be zero");
assert(std::all_of(BufferSizes.begin(), BufferSizes.end(),
[&](std::size_t Size) { return Size == BufferSize; }) &&
"All input buffers must have the same size");
if (NextFlatIndex >= Size)
return 0;
const auto BatchSize = std::min<uint64_t>(BufferSize, Size - NextFlatIndex);
const auto CurrentFlatIndex = NextFlatIndex;
NextFlatIndex += BatchSize;
auto BufferPtrsTuple = std::make_tuple(Buffers.data()...);
llvm::parallelFor(0, BatchSize, [&](std::size_t Offset) {
writeInputs(CurrentFlatIndex, Offset, BufferPtrsTuple);
});
return static_cast<std::size_t>(BatchSize);
}
private:
using RangesTupleType = std::tuple<IndexedRange<InTypes>...>;
using IndexArrayType = std::array<uint64_t, NumInputs>;
[[nodiscard]] constexpr IndexArrayType
getNDIndex(uint64_t FlatIndex) const noexcept {
IndexArrayType NDIndex;
static bool getSizeWithOverflow(const IndexedRange<InTypes> &...Ranges,
uint64_t &Size) noexcept {
Size = 1;
bool Overflowed = false;
for (std::size_t Index = 0; Index < Base::NumInputs; ++Index) {
NDIndex[Index] = FlatIndex / Strides[Index];
FlatIndex -= NDIndex[Index] * Strides[Index];
}
auto Multiplier = [&](const uint64_t RangeSize) {
if (!Overflowed)
Overflowed = __builtin_mul_overflow(Size, RangeSize, &Size);
};
(Multiplier(Ranges.getSize()), ...);
return Overflowed;
return NDIndex;
}
template <typename BufferPtrsTupleType>
@ -109,31 +82,37 @@ private:
writeInputsImpl<0>(NDIndex, Offset, BufferPtrsTuple);
}
constexpr IndexArrayType getNDIndex(uint64_t FlatIndex) const noexcept {
IndexArrayType NDIndex;
for (std::size_t Index = 0; Index < NumInputs; ++Index) {
NDIndex[Index] = FlatIndex / Strides[Index];
FlatIndex -= NDIndex[Index] * Strides[Index];
}
return NDIndex;
}
template <std::size_t Index, typename BufferPtrsTupleType>
void writeInputsImpl(IndexArrayType NDIndex, uint64_t Offset,
BufferPtrsTupleType BufferPtrsTuple) const noexcept {
if constexpr (Index < NumInputs) {
if constexpr (Index < Base::NumInputs) {
const auto &Range = std::get<Index>(RangesTuple);
std::get<Index>(BufferPtrsTuple)[Offset] = Range[NDIndex[Index]];
writeInputsImpl<Index + 1>(NDIndex, Offset, BufferPtrsTuple);
}
}
uint64_t Size = 1;
RangesTupleType RangesTuple;
[[nodiscard]] static constexpr std::optional<uint64_t>
getInputSpaceSize(const IndexedRange<InTypes> &...Ranges) noexcept {
uint64_t InputSpaceSize = 1;
bool Overflowed = false;
auto Multiplier = [&](const uint64_t RangeSize) {
if (!Overflowed)
Overflowed =
__builtin_mul_overflow(InputSpaceSize, RangeSize, &InputSpaceSize);
};
(Multiplier(Ranges.getSize()), ...);
if (Overflowed)
return std::nullopt;
return InputSpaceSize;
}
IndexArrayType Strides = {};
uint64_t NextFlatIndex = 0;
};
} // namespace mathtest

View File

@ -0,0 +1,86 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the definition of the RandomGenerator class, a concrete
/// range-based generator that randomly creates inputs from a given sequence of
/// ranges.
///
//===----------------------------------------------------------------------===//
#ifndef MATHTEST_RANDOMGENERATOR_HPP
#define MATHTEST_RANDOMGENERATOR_HPP
#include "mathtest/IndexedRange.hpp"
#include "mathtest/RandomState.hpp"
#include "mathtest/RangeBasedGenerator.hpp"
#include <cstddef>
#include <cstdint>
#include <tuple>
namespace mathtest {
template <typename... InTypes>
class [[nodiscard]] RandomGenerator final
: public RangeBasedGenerator<RandomGenerator<InTypes...>, InTypes...> {
friend class RangeBasedGenerator<RandomGenerator<InTypes...>, InTypes...>;
using Base = RangeBasedGenerator<RandomGenerator<InTypes...>, InTypes...>;
using Base::RangesTuple;
using Base::Size;
public:
explicit constexpr RandomGenerator(
SeedTy BaseSeed, uint64_t Size,
const IndexedRange<InTypes> &...Ranges) noexcept
: Base(Size, Ranges...), BaseSeed(BaseSeed) {}
private:
[[nodiscard]] static uint64_t getRandomIndex(RandomState &RNG,
uint64_t RangeSize) noexcept {
if (RangeSize == 0)
return 0;
const uint64_t Threshold = (-RangeSize) % RangeSize;
uint64_t RandomNumber;
do {
RandomNumber = RNG.next();
} while (RandomNumber < Threshold);
return RandomNumber % RangeSize;
}
template <typename BufferPtrsTupleType>
void writeInputs(uint64_t CurrentFlatIndex, uint64_t Offset,
BufferPtrsTupleType BufferPtrsTuple) const noexcept {
RandomState RNG(SeedTy{BaseSeed.Value ^ (CurrentFlatIndex + Offset)});
writeInputsImpl<0>(RNG, Offset, BufferPtrsTuple);
}
template <std::size_t Index, typename BufferPtrsTupleType>
void writeInputsImpl(RandomState &RNG, uint64_t Offset,
BufferPtrsTupleType BufferPtrsTuple) const noexcept {
if constexpr (Index < Base::NumInputs) {
const auto &Range = std::get<Index>(RangesTuple);
const auto RandomIndex = getRandomIndex(RNG, Range.getSize());
std::get<Index>(BufferPtrsTuple)[Offset] = Range[RandomIndex];
writeInputsImpl<Index + 1>(RNG, Offset, BufferPtrsTuple);
}
}
SeedTy BaseSeed;
};
} // namespace mathtest
#endif // MATHTEST_RANDOMGENERATOR_HPP

View File

@ -0,0 +1,53 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the definition of the RandomState class, a fast and
/// lightweight pseudo-random number generator.
///
/// The implementation is based on the xorshift* generator, seeded using the
/// SplitMix64 generator for robust initialization. For more details on the
/// algorithm, see: https://en.wikipedia.org/wiki/Xorshift
///
//===----------------------------------------------------------------------===//
#ifndef MATHTEST_RANDOMSTATE_HPP
#define MATHTEST_RANDOMSTATE_HPP
#include <cstdint>
struct SeedTy {
uint64_t Value;
};
class [[nodiscard]] RandomState {
uint64_t State;
[[nodiscard]] static constexpr uint64_t splitMix64(uint64_t X) noexcept {
X += 0x9E3779B97F4A7C15ULL;
X = (X ^ (X >> 30)) * 0xBF58476D1CE4E5B9ULL;
X = (X ^ (X >> 27)) * 0x94D049BB133111EBULL;
X = (X ^ (X >> 31));
return X ? X : 0x9E3779B97F4A7C15ULL;
}
public:
explicit constexpr RandomState(SeedTy Seed) noexcept
: State(splitMix64(Seed.Value)) {}
inline uint64_t next() noexcept {
uint64_t X = State;
X ^= X >> 12;
X ^= X << 25;
X ^= X >> 27;
State = X;
return X * 0x2545F4914F6CDD1DULL;
}
};
#endif // MATHTEST_RANDOMSTATE_HPP

View File

@ -0,0 +1,86 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the definition of the RangeBasedGenerator class, a base
/// class for input generators that operate on a sequence of ranges.
///
//===----------------------------------------------------------------------===//
#ifndef MATHTEST_RANGEBASEDGENERATOR_HPP
#define MATHTEST_RANGEBASEDGENERATOR_HPP
#include "mathtest/IndexedRange.hpp"
#include "mathtest/InputGenerator.hpp"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/Parallel.h"
#include <algorithm>
#include <array>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <tuple>
namespace mathtest {
template <typename Derived, typename... InTypes>
class [[nodiscard]] RangeBasedGenerator : public InputGenerator<InTypes...> {
public:
void reset() noexcept override { NextFlatIndex = 0; }
[[nodiscard]] std::size_t
fill(llvm::MutableArrayRef<InTypes>... Buffers) noexcept override {
const std::array<std::size_t, NumInputs> BufferSizes = {Buffers.size()...};
const std::size_t BufferSize = BufferSizes[0];
assert((BufferSize != 0) && "Buffer size cannot be zero");
assert(std::all_of(BufferSizes.begin(), BufferSizes.end(),
[&](std::size_t Size) { return Size == BufferSize; }) &&
"All input buffers must have the same size");
if (NextFlatIndex >= Size)
return 0;
const auto BatchSize = std::min<uint64_t>(BufferSize, Size - NextFlatIndex);
const auto CurrentFlatIndex = NextFlatIndex;
NextFlatIndex += BatchSize;
auto BufferPtrsTuple = std::make_tuple(Buffers.data()...);
llvm::parallelFor(0, BatchSize, [&](std::size_t Offset) {
static_cast<Derived *>(this)->writeInputs(CurrentFlatIndex, Offset,
BufferPtrsTuple);
});
return static_cast<std::size_t>(BatchSize);
}
protected:
using RangesTupleType = std::tuple<IndexedRange<InTypes>...>;
static constexpr std::size_t NumInputs = sizeof...(InTypes);
static_assert(NumInputs > 0, "The number of inputs must be at least 1");
explicit constexpr RangeBasedGenerator(
const IndexedRange<InTypes> &...Ranges) noexcept
: RangesTuple(Ranges...) {}
explicit constexpr RangeBasedGenerator(
uint64_t Size, const IndexedRange<InTypes> &...Ranges) noexcept
: RangesTuple(Ranges...), Size(Size) {}
RangesTupleType RangesTuple;
uint64_t Size = 0;
private:
uint64_t NextFlatIndex = 0;
};
} // namespace mathtest
#endif // MATHTEST_RANGEBASEDGENERATOR_HPP

View File

@ -19,6 +19,7 @@ add_conformance_test(exp10f Exp10fTest.cpp)
add_conformance_test(exp2f Exp2fTest.cpp)
add_conformance_test(expm1f Expm1fTest.cpp)
add_conformance_test(hypotf16 Hypotf16Test.cpp)
add_conformance_test(log LogTest.cpp)
add_conformance_test(logf LogfTest.cpp)
add_conformance_test(log10f Log10fTest.cpp)
add_conformance_test(log1pf Log1pfTest.cpp)

View File

@ -0,0 +1,66 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the conformance test of the log function.
///
//===----------------------------------------------------------------------===//
#include "mathtest/CommandLineExtras.hpp"
#include "mathtest/IndexedRange.hpp"
#include "mathtest/RandomGenerator.hpp"
#include "mathtest/RandomState.hpp"
#include "mathtest/TestConfig.hpp"
#include "mathtest/TestRunner.hpp"
#include "llvm/ADT/StringRef.h"
#include <cstdlib>
#include <limits>
#include <math.h>
namespace {
// Disambiguate the overloaded 'log' function to select the double version
constexpr auto logd // NOLINT(readability-identifier-naming)
= static_cast<double (*)(double)>(log);
} // namespace
namespace mathtest {
template <> struct FunctionConfig<logd> {
static constexpr llvm::StringRef Name = "log";
static constexpr llvm::StringRef KernelName = "logKernel";
// Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4,
// Table 68, Khronos Registry [July 10, 2025].
static constexpr uint64_t UlpTolerance = 3;
};
} // namespace mathtest
int main(int argc, const char **argv) {
llvm::cl::ParseCommandLineOptions(argc, argv,
"Conformance test of the log function");
using namespace mathtest;
uint64_t Seed = 42;
uint64_t Size = 1ULL << 32;
IndexedRange<double> Range(/*Begin=*/0.0,
/*End=*/std::numeric_limits<double>::infinity(),
/*Inclusive=*/true);
RandomGenerator<double> Generator(SeedTy{Seed}, Size, Range);
const auto Configs = cl::getTestConfigs();
const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR;
const bool IsVerbose = cl::IsVerbose;
bool Passed = runTests<logd>(Generator, Configs, DeviceBinaryDir, IsVerbose);
return Passed ? EXIT_SUCCESS : EXIT_FAILURE;
}