
This sprinkles a few mutexes around the plugin interface so that the olLaunchKernel CTS test now passes when ran on multiple threads. Part of this also involved changing the interface for device synchronise so that it can optionally not free the underlying queue (which introduced a race condition in liboffload).
287 lines
8.6 KiB
C++
287 lines
8.6 KiB
C++
//===------- Offload API tests - olLaunchKernel --------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "../common/Fixtures.hpp"
|
|
#include <OffloadAPI.h>
|
|
#include <gtest/gtest.h>
|
|
|
|
struct LaunchKernelTestBase : OffloadQueueTest {
|
|
void SetUpProgram(const char *program) {
|
|
RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp());
|
|
ASSERT_TRUE(TestEnvironment::loadDeviceBinary(program, Device, DeviceBin));
|
|
ASSERT_GE(DeviceBin->getBufferSize(), 0lu);
|
|
ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(),
|
|
DeviceBin->getBufferSize(), &Program));
|
|
|
|
LaunchArgs.Dimensions = 1;
|
|
LaunchArgs.GroupSize = {64, 1, 1};
|
|
LaunchArgs.NumGroups = {1, 1, 1};
|
|
|
|
LaunchArgs.DynSharedMemory = 0;
|
|
}
|
|
|
|
void TearDown() override {
|
|
if (Program) {
|
|
olDestroyProgram(Program);
|
|
}
|
|
RETURN_ON_FATAL_FAILURE(OffloadQueueTest::TearDown());
|
|
}
|
|
|
|
std::unique_ptr<llvm::MemoryBuffer> DeviceBin;
|
|
ol_program_handle_t Program = nullptr;
|
|
ol_kernel_launch_size_args_t LaunchArgs{};
|
|
};
|
|
|
|
struct LaunchSingleKernelTestBase : LaunchKernelTestBase {
|
|
void SetUpKernel(const char *kernel) {
|
|
RETURN_ON_FATAL_FAILURE(SetUpProgram(kernel));
|
|
ASSERT_SUCCESS(
|
|
olGetSymbol(Program, kernel, OL_SYMBOL_KIND_KERNEL, &Kernel));
|
|
}
|
|
|
|
ol_symbol_handle_t Kernel = nullptr;
|
|
};
|
|
|
|
#define KERNEL_TEST(NAME, KERNEL) \
|
|
struct olLaunchKernel##NAME##Test : LaunchSingleKernelTestBase { \
|
|
void SetUp() override { SetUpKernel(#KERNEL); } \
|
|
}; \
|
|
OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernel##NAME##Test);
|
|
|
|
KERNEL_TEST(Foo, foo)
|
|
KERNEL_TEST(NoArgs, noargs)
|
|
KERNEL_TEST(LocalMem, localmem)
|
|
KERNEL_TEST(LocalMemReduction, localmem_reduction)
|
|
KERNEL_TEST(LocalMemStatic, localmem_static)
|
|
KERNEL_TEST(GlobalCtor, global_ctor)
|
|
KERNEL_TEST(GlobalDtor, global_dtor)
|
|
|
|
struct LaunchMultipleKernelTestBase : LaunchKernelTestBase {
|
|
void SetUpKernels(const char *program, std::vector<const char *> kernels) {
|
|
RETURN_ON_FATAL_FAILURE(SetUpProgram(program));
|
|
|
|
Kernels.resize(kernels.size());
|
|
size_t I = 0;
|
|
for (auto K : kernels)
|
|
ASSERT_SUCCESS(
|
|
olGetSymbol(Program, K, OL_SYMBOL_KIND_KERNEL, &Kernels[I++]));
|
|
}
|
|
|
|
std::vector<ol_symbol_handle_t> Kernels;
|
|
};
|
|
|
|
#define KERNEL_MULTI_TEST(NAME, PROGRAM, ...) \
|
|
struct olLaunchKernel##NAME##Test : LaunchMultipleKernelTestBase { \
|
|
void SetUp() override { SetUpKernels(#PROGRAM, {__VA_ARGS__}); } \
|
|
}; \
|
|
OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernel##NAME##Test);
|
|
|
|
KERNEL_MULTI_TEST(Global, global, "write", "read")
|
|
|
|
TEST_P(olLaunchKernelFooTest, Success) {
|
|
void *Mem;
|
|
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
|
|
LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem));
|
|
struct {
|
|
void *Mem;
|
|
} Args{Mem};
|
|
|
|
ASSERT_SUCCESS(
|
|
olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs));
|
|
|
|
ASSERT_SUCCESS(olSyncQueue(Queue));
|
|
|
|
uint32_t *Data = (uint32_t *)Mem;
|
|
for (uint32_t i = 0; i < 64; i++) {
|
|
ASSERT_EQ(Data[i], i);
|
|
}
|
|
|
|
ASSERT_SUCCESS(olMemFree(Mem));
|
|
}
|
|
|
|
TEST_P(olLaunchKernelFooTest, SuccessThreaded) {
|
|
threadify([&](size_t) {
|
|
void *Mem;
|
|
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
|
|
LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem));
|
|
struct {
|
|
void *Mem;
|
|
} Args{Mem};
|
|
|
|
ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args),
|
|
&LaunchArgs));
|
|
|
|
ASSERT_SUCCESS(olSyncQueue(Queue));
|
|
|
|
uint32_t *Data = (uint32_t *)Mem;
|
|
for (uint32_t i = 0; i < 64; i++) {
|
|
ASSERT_EQ(Data[i], i);
|
|
}
|
|
|
|
ASSERT_SUCCESS(olMemFree(Mem));
|
|
});
|
|
}
|
|
|
|
TEST_P(olLaunchKernelNoArgsTest, Success) {
|
|
ASSERT_SUCCESS(
|
|
olLaunchKernel(Queue, Device, Kernel, nullptr, 0, &LaunchArgs));
|
|
|
|
ASSERT_SUCCESS(olSyncQueue(Queue));
|
|
}
|
|
|
|
TEST_P(olLaunchKernelFooTest, SuccessSynchronous) {
|
|
void *Mem;
|
|
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
|
|
LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem));
|
|
|
|
struct {
|
|
void *Mem;
|
|
} Args{Mem};
|
|
|
|
ASSERT_SUCCESS(olLaunchKernel(nullptr, Device, Kernel, &Args, sizeof(Args),
|
|
&LaunchArgs));
|
|
|
|
uint32_t *Data = (uint32_t *)Mem;
|
|
for (uint32_t i = 0; i < 64; i++) {
|
|
ASSERT_EQ(Data[i], i);
|
|
}
|
|
|
|
ASSERT_SUCCESS(olMemFree(Mem));
|
|
}
|
|
|
|
TEST_P(olLaunchKernelLocalMemTest, Success) {
|
|
LaunchArgs.NumGroups.x = 4;
|
|
LaunchArgs.DynSharedMemory = 64 * sizeof(uint32_t);
|
|
|
|
void *Mem;
|
|
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
|
|
LaunchArgs.GroupSize.x * LaunchArgs.NumGroups.x *
|
|
sizeof(uint32_t),
|
|
&Mem));
|
|
struct {
|
|
void *Mem;
|
|
} Args{Mem};
|
|
|
|
ASSERT_SUCCESS(
|
|
olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs));
|
|
|
|
ASSERT_SUCCESS(olSyncQueue(Queue));
|
|
|
|
uint32_t *Data = (uint32_t *)Mem;
|
|
for (uint32_t i = 0; i < LaunchArgs.GroupSize.x * LaunchArgs.NumGroups.x; i++)
|
|
ASSERT_EQ(Data[i], (i % 64) * 2);
|
|
|
|
ASSERT_SUCCESS(olMemFree(Mem));
|
|
}
|
|
|
|
TEST_P(olLaunchKernelLocalMemReductionTest, Success) {
|
|
LaunchArgs.NumGroups.x = 4;
|
|
LaunchArgs.DynSharedMemory = 64 * sizeof(uint32_t);
|
|
|
|
void *Mem;
|
|
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
|
|
LaunchArgs.NumGroups.x * sizeof(uint32_t), &Mem));
|
|
struct {
|
|
void *Mem;
|
|
} Args{Mem};
|
|
|
|
ASSERT_SUCCESS(
|
|
olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs));
|
|
|
|
ASSERT_SUCCESS(olSyncQueue(Queue));
|
|
|
|
uint32_t *Data = (uint32_t *)Mem;
|
|
for (uint32_t i = 0; i < LaunchArgs.NumGroups.x; i++)
|
|
ASSERT_EQ(Data[i], 2 * LaunchArgs.GroupSize.x);
|
|
|
|
ASSERT_SUCCESS(olMemFree(Mem));
|
|
}
|
|
|
|
TEST_P(olLaunchKernelLocalMemStaticTest, Success) {
|
|
LaunchArgs.NumGroups.x = 4;
|
|
LaunchArgs.DynSharedMemory = 0;
|
|
|
|
void *Mem;
|
|
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
|
|
LaunchArgs.NumGroups.x * sizeof(uint32_t), &Mem));
|
|
struct {
|
|
void *Mem;
|
|
} Args{Mem};
|
|
|
|
ASSERT_SUCCESS(
|
|
olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs));
|
|
|
|
ASSERT_SUCCESS(olSyncQueue(Queue));
|
|
|
|
uint32_t *Data = (uint32_t *)Mem;
|
|
for (uint32_t i = 0; i < LaunchArgs.NumGroups.x; i++)
|
|
ASSERT_EQ(Data[i], 2 * LaunchArgs.GroupSize.x);
|
|
|
|
ASSERT_SUCCESS(olMemFree(Mem));
|
|
}
|
|
|
|
TEST_P(olLaunchKernelGlobalTest, Success) {
|
|
void *Mem;
|
|
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
|
|
LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem));
|
|
struct {
|
|
void *Mem;
|
|
} Args{Mem};
|
|
|
|
ASSERT_SUCCESS(
|
|
olLaunchKernel(Queue, Device, Kernels[0], nullptr, 0, &LaunchArgs));
|
|
ASSERT_SUCCESS(olSyncQueue(Queue));
|
|
ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernels[1], &Args, sizeof(Args),
|
|
&LaunchArgs));
|
|
ASSERT_SUCCESS(olSyncQueue(Queue));
|
|
|
|
uint32_t *Data = (uint32_t *)Mem;
|
|
for (uint32_t i = 0; i < 64; i++) {
|
|
ASSERT_EQ(Data[i], i * 2);
|
|
}
|
|
|
|
ASSERT_SUCCESS(olMemFree(Mem));
|
|
}
|
|
|
|
TEST_P(olLaunchKernelGlobalTest, InvalidNotAKernel) {
|
|
ol_symbol_handle_t Global = nullptr;
|
|
ASSERT_SUCCESS(
|
|
olGetSymbol(Program, "global", OL_SYMBOL_KIND_GLOBAL_VARIABLE, &Global));
|
|
ASSERT_ERROR(OL_ERRC_SYMBOL_KIND,
|
|
olLaunchKernel(Queue, Device, Global, nullptr, 0, &LaunchArgs));
|
|
}
|
|
|
|
TEST_P(olLaunchKernelGlobalCtorTest, Success) {
|
|
void *Mem;
|
|
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
|
|
LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem));
|
|
struct {
|
|
void *Mem;
|
|
} Args{Mem};
|
|
|
|
ASSERT_SUCCESS(
|
|
olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs));
|
|
ASSERT_SUCCESS(olSyncQueue(Queue));
|
|
|
|
uint32_t *Data = (uint32_t *)Mem;
|
|
for (uint32_t i = 0; i < 64; i++) {
|
|
ASSERT_EQ(Data[i], i + 100);
|
|
}
|
|
|
|
ASSERT_SUCCESS(olMemFree(Mem));
|
|
}
|
|
|
|
TEST_P(olLaunchKernelGlobalDtorTest, Success) {
|
|
// TODO: We can't inspect the result of a destructor yet, once we
|
|
// find/implement a way, update this test. For now we just check that nothing
|
|
// crashes
|
|
ASSERT_SUCCESS(
|
|
olLaunchKernel(Queue, Device, Kernel, nullptr, 0, &LaunchArgs));
|
|
ASSERT_SUCCESS(olSyncQueue(Queue));
|
|
}
|