[libc] Add Multithreaded GPU Benchmarks (#98964)
This PR runs benchmarks on a 32 threads (A single warp on NVPTX) by default, adding the option for single threaded benchmarks. We can specify that a benchmark should be run on a single thread using the `SINGLE_THREADED_BENCHMARK()` macro. I chose to use a flag here so that other options could be added in the future.
This commit is contained in:
parent
68cb903594
commit
8badfccefe
@ -10,6 +10,7 @@ function(add_benchmark benchmark_name)
|
||||
"LINK_LIBRARIES" # Multi-value arguments
|
||||
${ARGN}
|
||||
)
|
||||
|
||||
if(NOT libc.src.time.clock IN_LIST TARGET_LLVMLIBC_ENTRYPOINTS)
|
||||
message(FATAL_ERROR "target does not support clock")
|
||||
endif()
|
||||
|
@ -114,8 +114,13 @@ void Benchmark::run_benchmarks() {
|
||||
all_results.reset();
|
||||
|
||||
gpu::sync_threads();
|
||||
auto current_result = b->run();
|
||||
all_results.update(current_result);
|
||||
if (!b->flags ||
|
||||
((b->flags & BenchmarkFlags::SINGLE_THREADED) && id == 0) ||
|
||||
((b->flags & BenchmarkFlags::SINGLE_WAVE) &&
|
||||
id < gpu::get_lane_size())) {
|
||||
auto current_result = b->run();
|
||||
all_results.update(current_result);
|
||||
}
|
||||
gpu::sync_threads();
|
||||
|
||||
if (id == 0)
|
||||
|
@ -74,16 +74,19 @@ struct BenchmarkResult {
|
||||
clock_t total_time = 0;
|
||||
};
|
||||
|
||||
enum BenchmarkFlags { SINGLE_THREADED = 0x1, SINGLE_WAVE = 0x2 };
|
||||
|
||||
BenchmarkResult benchmark(const BenchmarkOptions &options,
|
||||
cpp::function<uint64_t(void)> wrapper_func);
|
||||
|
||||
class Benchmark {
|
||||
const cpp::function<uint64_t(void)> func;
|
||||
const cpp::string_view name;
|
||||
const uint8_t flags;
|
||||
|
||||
public:
|
||||
Benchmark(cpp::function<uint64_t(void)> func, char const *name)
|
||||
: func(func), name(name) {
|
||||
Benchmark(cpp::function<uint64_t(void)> func, char const *name, uint8_t flags)
|
||||
: func(func), name(name), flags(flags) {
|
||||
add_benchmark(this);
|
||||
}
|
||||
|
||||
@ -104,6 +107,16 @@ private:
|
||||
|
||||
#define BENCHMARK(SuiteName, TestName, Func) \
|
||||
LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
|
||||
Func, #SuiteName "." #TestName)
|
||||
Func, #SuiteName "." #TestName, 0)
|
||||
|
||||
#define SINGLE_THREADED_BENCHMARK(SuiteName, TestName, Func) \
|
||||
LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
|
||||
Func, #SuiteName "." #TestName, \
|
||||
LIBC_NAMESPACE::benchmarks::BenchmarkFlags::SINGLE_THREADED)
|
||||
|
||||
#define SINGLE_WAVE_BENCHMARK(SuiteName, TestName, Func) \
|
||||
LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
|
||||
Func, #SuiteName "." #TestName, \
|
||||
LIBC_NAMESPACE::benchmarks::BenchmarkFlags::SINGLE_WAVE)
|
||||
|
||||
#endif
|
||||
|
@ -8,6 +8,8 @@ add_benchmark(
|
||||
isalnum_benchmark.cpp
|
||||
DEPENDS
|
||||
libc.src.ctype.isalnum
|
||||
LOADER_ARGS
|
||||
--threads 64
|
||||
)
|
||||
|
||||
add_benchmark(
|
||||
|
@ -7,6 +7,10 @@ uint64_t BM_IsAlnum() {
|
||||
return LIBC_NAMESPACE::latency(LIBC_NAMESPACE::isalnum, x);
|
||||
}
|
||||
BENCHMARK(LlvmLibcIsAlNumGpuBenchmark, IsAlnum, BM_IsAlnum);
|
||||
SINGLE_THREADED_BENCHMARK(LlvmLibcIsAlNumGpuBenchmark, IsAlnumSingleThread,
|
||||
BM_IsAlnum);
|
||||
SINGLE_WAVE_BENCHMARK(LlvmLibcIsAlNumGpuBenchmark, IsAlnumSingleWave,
|
||||
BM_IsAlnum);
|
||||
|
||||
uint64_t BM_IsAlnumCapital() {
|
||||
char x = 'A';
|
||||
|
Loading…
x
Reference in New Issue
Block a user