
This patch implements the `RandomGenerator`, a new input generator that enables conformance testing for functions with large input spaces (e.g., double-precision math functions). **Architectural Refactoring** To support different generation strategies in a clean and extensible way, the existing `ExhaustiveGenerator` was refactored into a new class hierarchy: * A new abstract base class, `RangeBasedGenerator`, was introduced using the Curiously Recurring Template Pattern (CRTP). It contains the common logic for generators that operate on a sequence of ranges. * `ExhaustiveGenerator` now inherits from this base class, simplifying its implementation. **New Components** * The new `RandomGenerator` class also inherits from `RangeBasedGenerator`. It implements a strategy that randomly samples a specified number of points from the total input space. * Random number generation is handled by a new, self-contained `RandomState` class (a `xorshift64*` PRNG seeded with `splitmix64`) to ensure deterministic and reproducible random streams for testing. **Example Usage** As a first use case and demonstration of this new capability, this patch also adds the first double-precision conformance test for the `log` function. This test uses the new `RandomGenerator` to validate the implementations from the `llvm-libm`, `cuda-math`, and `hip-math` providers.
184 lines
5.9 KiB
C++
184 lines
5.9 KiB
C++
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
///
|
|
/// \file
|
|
/// This file contains the implementation of the device kernels that wrap the
|
|
/// math functions from the cuda-math provider.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifdef CUDA_MATH_FOUND
|
|
|
|
#include "Conformance/device_code/DeviceAPIs.hpp"
|
|
#include "Conformance/device_code/KernelRunner.hpp"
|
|
|
|
#include <gpuintrin.h>
|
|
#include <stddef.h>
|
|
|
|
using namespace kernels;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Helpers
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static inline float sincosfSin(float X) {
|
|
float SinX, CosX;
|
|
__nv_sincosf(X, &SinX, &CosX);
|
|
return SinX;
|
|
}
|
|
|
|
static inline float sincosfCos(float X) {
|
|
float SinX, CosX;
|
|
__nv_sincosf(X, &SinX, &CosX);
|
|
return CosX;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Kernels
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
extern "C" {
|
|
|
|
__gpu_kernel void acosfKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_acosf>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void acoshfKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_acoshf>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void asinfKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_asinf>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void asinhfKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_asinhf>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void atanfKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_atanf>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void atanhfKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_atanhf>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void cbrtfKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_cbrtf>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void cosfKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_cosf>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void coshfKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_coshf>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void cospifKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_cospif>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void erffKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_erff>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void expfKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_expf>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void exp10fKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_exp10f>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void exp2fKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_exp2f>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void expm1fKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_expm1f>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void logKernel(const double *X, double *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_log>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void logfKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_logf>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void log10fKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_log10f>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void log1pfKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_log1pf>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void log2fKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_log2f>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void sinfKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_sinf>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void sincosfSinKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<sincosfSin>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void sincosfCosKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<sincosfCos>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void sinhfKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_sinhf>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void sinpifKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_sinpif>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void tanfKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_tanf>(NumElements, Out, X);
|
|
}
|
|
|
|
__gpu_kernel void tanhfKernel(const float *X, float *Out,
|
|
size_t NumElements) noexcept {
|
|
runKernelBody<__nv_tanhf>(NumElements, Out, X);
|
|
}
|
|
} // extern "C"
|
|
|
|
#endif // CUDA_MATH_FOUND
|