diff --git a/libsycl/docs/index.rst b/libsycl/docs/index.rst index 87ffb8481c86..0ec3a4d82507 100644 --- a/libsycl/docs/index.rst +++ b/libsycl/docs/index.rst @@ -108,3 +108,8 @@ TODO for added SYCL classes * ``context``: to implement get_info, properties & public constructors once context support is added to liboffload * ``queue``: to implement USM methods, to implement synchronization methods, to implement submit & copy with accessors (low priority), get_info & properties, ctors that accepts context (blocked by lack of liboffload support) * ``property_list``: to fully implement and integrate with existing SYCL runtime classes supporting it +* usm allocations: + + * add aligned functions (blocked by liboffload support) + * forward templated funcs to alignment methods (rewrite current impl) + * handle sub devices once they are implemented (blocked by liboffload support) diff --git a/libsycl/include/sycl/__impl/usm_alloc_type.hpp b/libsycl/include/sycl/__impl/usm_alloc_type.hpp new file mode 100644 index 000000000000..5455202754d0 --- /dev/null +++ b/libsycl/include/sycl/__impl/usm_alloc_type.hpp @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBSYCL___IMPL_USM_ALLOC_TYPE_HPP +#define _LIBSYCL___IMPL_USM_ALLOC_TYPE_HPP + +#include + +_LIBSYCL_BEGIN_NAMESPACE_SYCL + +namespace usm { + +// SYCL 2020 4.8.2. Kinds of unified shared memory. +enum class alloc : char { host = 0, device = 1, shared = 2, unknown = 3 }; + +} // namespace usm + +_LIBSYCL_END_NAMESPACE_SYCL + +#endif // _LIBSYCL___IMPL_USM_ALLOC_TYPE_HPP diff --git a/libsycl/include/sycl/__impl/usm_functions.hpp b/libsycl/include/sycl/__impl/usm_functions.hpp new file mode 100644 index 000000000000..63f6347855b6 --- /dev/null +++ b/libsycl/include/sycl/__impl/usm_functions.hpp @@ -0,0 +1,302 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBSYCL___IMPL_USM_FUNCTIONS_HPP +#define _LIBSYCL___IMPL_USM_FUNCTIONS_HPP + +#include +#include +#include + +#include + +_LIBSYCL_BEGIN_NAMESPACE_SYCL + +/// \name SYCL 2020 4.8.3.2. Device allocation functions. +/// \brief Allocations in device memory are not accessible by the host. +/// @{ +/// Allocates device USM. +/// +/// \param numBytes the number of bytes to allocate. +/// \param syclDevice the device to use for the allocation. +/// \param syclContext a context containing syclDevice or its parent device if +/// syclDevice is a subdevice. +/// \param propList the list of properties for the allocation. +/// \return a pointer to the newly allocated memory, which is allocated on +/// syclDevice and which must eventually be deallocated with sycl::free in order +/// to avoid a memory leak. +void *_LIBSYCL_EXPORT malloc_device(std::size_t numBytes, + const device &syclDevice, + const context &syclContext, + const property_list &propList = {}); + +/// Allocates device USM. +/// +/// \param count the number of elements of type T to allocate. +/// \param syclDevice the device to use for the allocation. +/// \param syclContext a context containing syclDevice or its parent device if +/// syclDevice is a subdevice. +/// \param propList the list of properties for the allocation. +/// \return a pointer to the newly allocated memory, which is allocated on +/// syclDevice and which must eventually be deallocated with sycl::free in order +/// to avoid a memory leak. +template +T *malloc_device(std::size_t count, const device &syclDevice, + const context &syclContext, + const property_list &propList = {}) { + // TODO: to rewrite with aligned_malloc_device once it's supported in + // liboffload. + return static_cast( + malloc_device(count * sizeof(T), syclDevice, syclContext, propList)); +} + +/// Allocates device USM. +/// +/// \param numBytes the number of bytes to allocate. +/// \param syclQueue a queue that provides the device and context. +/// \param propList the list of properties for the allocation. +/// \return a pointer to the newly allocated memory, which is allocated on +/// syclDevice and which must eventually be deallocated with sycl::free in order +/// to avoid a memory leak. +void *_LIBSYCL_EXPORT malloc_device(std::size_t numBytes, + const queue &syclQueue, + const property_list &propList = {}); + +/// Allocates device USM. +/// +/// \param count the number of elements of type T to allocate. +/// \param syclQueue a queue that provides the device and context. +/// \param propList the list of properties for the allocation. +/// \return a pointer to the newly allocated memory, which is allocated on +/// syclDevice and which must eventually be deallocated with sycl::free in order +/// to avoid a memory leak. +template +T *malloc_device(std::size_t count, const queue &syclQueue, + const property_list &propList = {}) { + return malloc_device(count, syclQueue.get_device(), + syclQueue.get_context(), propList); +} +/// @} + +/// \name SYCL 2020 4.8.3.3. Host allocation functions. +/// \brief Allocations in host memory are accessible by a device. +/// @{ +/// Allocates host USM. +/// +/// \param numBytes the number of bytes to allocate. +/// \param syclContext the context that should have access to the allocated +/// memory. +/// \param propList the list of properties for the allocation. +/// \return a pointer to the newly allocated memory, which must eventually be +/// deallocated with sycl::free in order to avoid a memory leak. +void *_LIBSYCL_EXPORT malloc_host(std::size_t numBytes, + const context &syclContext, + const property_list &propList = {}); + +/// Allocates host USM. +/// +/// \param count the number of elements of type T to allocate. +/// \param syclContext the context that should have access to the allocated +/// memory. +/// \param propList the list of properties for the allocation. +/// \return a pointer to the newly allocated memory, which must eventually be +/// deallocated with sycl::free in order to avoid a memory leak. +template +T *malloc_host(std::size_t count, const context &syclContext, + const property_list &propList = {}) { + // TODO: to rewrite with aligned_malloc_host once it's supported in + // liboffload. + return static_cast( + malloc_host(count * sizeof(T), syclContext, propList)); +} + +/// Allocates host USM. +/// +/// \param numBytes the number of bytes to allocate. +/// \param syclQueue queue that provides the context. +/// \param propList the list of properties for the allocation. +/// \return a pointer to the newly allocated memory, which must eventually be +/// deallocated with sycl::free in order to avoid a memory leak. +void *_LIBSYCL_EXPORT malloc_host(std::size_t numBytes, const queue &syclQueue, + const property_list &propList = {}); + +/// Allocates host USM. +/// +/// \param count the number of elements of type T to allocate. +/// \param syclQueue queue that provides the context. +/// \param propList the list of properties for the allocation. +/// \return a pointer to the newly allocated memory, which must eventually be +/// deallocated with sycl::free in order to avoid a memory leak. +template +T *malloc_host(std::size_t count, const queue &syclQueue, + const property_list &propList = {}) { + return malloc_host(count, syclQueue.get_context(), propList); +} +/// @} + +/// \name SYCL 2020 4.8.3.4. Shared allocation functions. +/// \brief Allocations in shared memory are accessible by both host and device. +/// @{ +/// Allocates shared USM. +/// +/// \param numBytes the number of bytes to allocate. +/// \param syclDevice the device to use for the allocation. +/// \param syclContext a context containing syclDevice or its parent device if +/// syclDevice is a subdevice. +/// \param propList the list of properties for the allocation. +/// \return a pointer to the newly allocated memory, which must eventually be +/// deallocated with sycl::free in order to avoid a memory leak. +void *_LIBSYCL_EXPORT malloc_shared(std::size_t numBytes, + const device &syclDevice, + const context &syclContext, + const property_list &propList = {}); + +/// Allocates shared USM. +/// +/// \param count the number of elements of type T to allocate. +/// \param syclDevice the device to use for the allocation. +/// \param syclContext a context containing syclDevice or its parent device if +/// syclDevice is a subdevice. +/// \param propList the list of properties for the allocation. +/// \return a pointer to the newly allocated memory, which must eventually be +/// deallocated with sycl::free in order to avoid a memory leak. +template +T *malloc_shared(std::size_t count, const device &syclDevice, + const context &syclContext, + const property_list &propList = {}) { + // TODO: to rewrite with aligned_malloc_shared once it's supported in + // liboffload. + return static_cast( + malloc_shared(count * sizeof(T), syclDevice, syclContext, propList)); +} + +/// Allocates shared USM. +/// +/// \param numBytes the number of bytes to allocate. +/// \param syclQueue a queue that provides the device and context. +/// \param propList the list of properties for the allocation. +/// \return a pointer to the newly allocated memory, which must eventually be +/// deallocated with sycl::free in order to avoid a memory leak. +void *_LIBSYCL_EXPORT malloc_shared(std::size_t numBytes, + const queue &syclQueue, + const property_list &propList = {}); + +/// Allocates shared USM. +/// +/// \param count the number of elements of type T to allocate. +/// \param syclQueue a queue that provides the device and context. +/// \param propList the list of properties for the allocation. +/// \return a pointer to the newly allocated memory, which must eventually be +/// deallocated with sycl::free in order to avoid a memory leak. +template +T *malloc_shared(std::size_t count, const queue &syclQueue, + const property_list &propList = {}) { + return malloc_shared(count, syclQueue.get_device(), + syclQueue.get_context(), propList); +} +/// @} + +/// \name SYCL 2020 4.8.3.5. Parameterized allocation functions. +/// @{ +/// Allocates USM of type `kind`. +/// +/// \param numBytes the number of bytes to allocate. +/// \param syclDevice the device to use for the allocation. The syclDevice +/// parameter is ignored if kind is usm::alloc::host. +/// \param syclContext a context containing syclDevice or its parent device if +/// syclDevice is a subdevice. +/// \param kind the type of memory to allocate. +/// \param propList the list of properties for the allocation. +/// \return a pointer to the newly allocated memory, which must eventually be +/// deallocated with sycl::free in order to avoid a memory leak. If there are +/// not enough resources to allocate the requested memory, these functions +/// return nullptr. +void *_LIBSYCL_EXPORT malloc(std::size_t numBytes, const device &syclDevice, + const context &syclContext, usm::alloc kind, + const property_list &propList = {}); + +/// Allocates USM of type `kind`. +/// +/// \param count the number of elements of type T to allocate. +/// \param syclDevice the device to use for the allocation. The syclDevice +/// parameter is ignored if kind is usm::alloc::host. +/// \param syclContext a context containing syclDevice or its parent device if +/// syclDevice is a subdevice. +/// \param kind the type of memory to allocate. +/// \param propList the list of properties for the allocation. +/// \return a pointer to the newly allocated memory, which must eventually be +/// deallocated with sycl::free in order to avoid a memory leak. If there are +/// not enough resources to allocate the requested memory, these functions +/// return nullptr. +template +T *malloc(std::size_t count, const device &syclDevice, + const context &syclContext, usm::alloc kind, + const property_list &propList = {}) { + // TODO: to rewrite with aligned_malloc once it's supported in liboffload. + return static_cast( + malloc(count * sizeof(T), syclDevice, syclContext, kind, propList)); +} + +/// Allocates USM of type `kind`. +/// +/// \param numBytes the number of bytes to allocate. +/// \param syclQueue a queue that provides the device and context. +/// \param kind the type of memory to allocate. +/// \param propList the list of properties for the allocation. +/// \return a pointer to the newly allocated memory, which must eventually be +/// deallocated with sycl::free in order to avoid a memory leak. If there are +/// not enough resources to allocate the requested memory, these functions +/// return nullptr. +void *_LIBSYCL_EXPORT malloc(std::size_t numBytes, const queue &syclQueue, + usm::alloc kind, + const property_list &propList = {}); + +/// Allocates USM of type `kind`. +/// +/// \param count the number of elements of type T to allocate. +/// \param syclQueue a queue that provides the device and context. +/// \param kind the type of memory to allocate. +/// \param propList the list of properties for the allocation. +/// \return a pointer to the newly allocated memory, which must eventually be +/// deallocated with sycl::free in order to avoid a memory leak. If there are +/// not enough resources to allocate the requested memory, these functions +/// return nullptr. +template +T *malloc(std::size_t count, const queue &syclQueue, usm::alloc kind, + const property_list &propList = {}) { + return malloc(count, syclQueue.get_device(), syclQueue.get_context(), kind, + propList); +} +/// @} + +/// \name SYCL 2020 4.8.3.6. Memory deallocation functions. +/// @{ +/// Deallocate USM of any kind. +/// +/// \param ptr a pointer that satisfies the following preconditions: points to +/// memory allocated against ctxt using one of the USM allocation routines, or +/// is a null pointer; ptr has not previously been deallocated; there are no +/// in-progress or enqueued commands using the memory pointed to by ptr. +/// \param ctxt the context that is associated with ptr. +void _LIBSYCL_EXPORT free(void *ptr, const context &ctxt); + +/// Deallocate USM of any kind. +/// +/// Equivalent to free(ptr, q.get_context()). +/// +/// \param ptr a pointer that satisfies the following preconditions: points to +/// memory allocated against ctxt using one of the USM allocation routines, or +/// is a null pointer; ptr has not previously been deallocated; there are no +/// in-progress or enqueued commands using the memory pointed to by ptr. +/// \param q a queue to determine the context associated with ptr. +void _LIBSYCL_EXPORT free(void *ptr, const queue &q); +/// @} + +_LIBSYCL_END_NAMESPACE_SYCL + +#endif // _LIBSYCL___IMPL_USM_FUNCTIONS_HPP diff --git a/libsycl/include/sycl/sycl.hpp b/libsycl/include/sycl/sycl.hpp index e1bd55e36156..3fcf088f4553 100644 --- a/libsycl/include/sycl/sycl.hpp +++ b/libsycl/include/sycl/sycl.hpp @@ -20,5 +20,6 @@ #include #include #include +#include #endif // _LIBSYCL_SYCL_HPP diff --git a/libsycl/src/CMakeLists.txt b/libsycl/src/CMakeLists.txt index 1e4e4178bd66..67ba7d28968d 100644 --- a/libsycl/src/CMakeLists.txt +++ b/libsycl/src/CMakeLists.txt @@ -88,6 +88,7 @@ set(LIBSYCL_SOURCES "device_selector.cpp" "platform.cpp" "queue.cpp" + "usm_functions.cpp" "detail/context_impl.cpp" "detail/device_impl.cpp" "detail/global_objects.cpp" diff --git a/libsycl/src/detail/device_impl.cpp b/libsycl/src/detail/device_impl.cpp index d12f97d0db86..023b97c14452 100644 --- a/libsycl/src/detail/device_impl.cpp +++ b/libsycl/src/detail/device_impl.cpp @@ -25,6 +25,12 @@ bool DeviceImpl::has(aspect Aspect) const { case (aspect::emulated): case (aspect::host_debuggable): return false; + case (aspect::usm_device_allocations): + case (aspect::usm_host_allocations): + case (aspect::usm_shared_allocations): + // liboffload works with USM only and has no query to check support. We + // assume that USM is always supported. + return true; default: // Other aspects are not implemented yet return false; diff --git a/libsycl/src/detail/global_objects.cpp b/libsycl/src/detail/global_objects.cpp index 35e32985e7cb..d80be710268f 100644 --- a/libsycl/src/detail/global_objects.cpp +++ b/libsycl/src/detail/global_objects.cpp @@ -53,3 +53,6 @@ std::vector &getPlatformCache() { } // namespace detail _LIBSYCL_END_NAMESPACE_SYCL + +extern "C" void __sycl_register_lib(void *) {} +extern "C" void __sycl_unregister_lib(void *) {} diff --git a/libsycl/src/detail/global_objects.hpp b/libsycl/src/detail/global_objects.hpp index 4535a254c660..008cb01f4f35 100644 --- a/libsycl/src/detail/global_objects.hpp +++ b/libsycl/src/detail/global_objects.hpp @@ -16,6 +16,19 @@ #include #include +// +++ Entry points referenced by the offload wrapper object { + +/// Executed as a part of current module's (.exe, .dll) static initialization. +/// Registers device executable images with the runtime. +extern "C" _LIBSYCL_EXPORT void __sycl_register_lib(void *); + +/// Executed as a part of current module's (.exe, .dll) static +/// de-initialization. +/// Unregisters device executable images with the runtime. +extern "C" _LIBSYCL_EXPORT void __sycl_unregister_lib(void *); + +// +++ } + _LIBSYCL_BEGIN_NAMESPACE_SYCL namespace detail { diff --git a/libsycl/src/detail/offload/offload_utils.cpp b/libsycl/src/detail/offload/offload_utils.cpp index 9a2609daddce..594c41f9e965 100644 --- a/libsycl/src/detail/offload/offload_utils.cpp +++ b/libsycl/src/detail/offload/offload_utils.cpp @@ -88,5 +88,21 @@ info::device_type convertDeviceTypeToSYCL(ol_device_type_t DeviceType) { } } +ol_alloc_type_t getOlAllocType(usm::alloc USMKind) { + switch (USMKind) { + case usm::alloc::host: + return OL_ALLOC_TYPE_HOST; + case usm::alloc::device: + return OL_ALLOC_TYPE_DEVICE; + case usm::alloc::shared: + return OL_ALLOC_TYPE_MANAGED; + case usm::alloc::unknown: + // usm::alloc::unknown can be returned to user from get_pointer_type but it + // can't be converted to a valid backend type. + throw exception(sycl::make_error_code(sycl::errc::runtime), + "USM kind is not supported"); + } +} + } // namespace detail _LIBSYCL_END_NAMESPACE_SYCL diff --git a/libsycl/src/detail/offload/offload_utils.hpp b/libsycl/src/detail/offload/offload_utils.hpp index e849ee137337..0df9f5aaffe1 100644 --- a/libsycl/src/detail/offload/offload_utils.hpp +++ b/libsycl/src/detail/offload/offload_utils.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -102,6 +103,13 @@ ol_device_type_t convertDeviceTypeToOL(info::device_type DeviceType); /// \returns SYCL device type matching specified liboffload device type. info::device_type convertDeviceTypeToSYCL(ol_device_type_t DeviceType); +/// Converts a SYCL USM kind to a liboffload type. +/// +/// \param USMKind a SYCL USM kind. +/// +/// \returns ol_alloc_type_t matching the specified SYCL USM kind. +ol_alloc_type_t getOlAllocType(usm::alloc USMKind); + /// Helper to map SYCL information descriptors to OL__INFO_. /// /// Typical usage: diff --git a/libsycl/src/ld-version-script.txt b/libsycl/src/ld-version-script.txt index a347d202a367..eeb78e2cf59b 100644 --- a/libsycl/src/ld-version-script.txt +++ b/libsycl/src/ld-version-script.txt @@ -15,6 +15,10 @@ _ZTSN4sycl*; /* typeinfo name */ _ZTVN4sycl*; /* vtable */ + /* Export offload image hooks */ + __sycl_register_lib; + __sycl_unregister_lib; + local: *; }; diff --git a/libsycl/src/usm_functions.cpp b/libsycl/src/usm_functions.cpp new file mode 100644 index 000000000000..24a099ea4cf2 --- /dev/null +++ b/libsycl/src/usm_functions.cpp @@ -0,0 +1,126 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include +#include + +#include + +#include + +_LIBSYCL_BEGIN_NAMESPACE_SYCL + +// SYCL 2020 4.8.3.2. Device allocation functions. + +void *malloc_device(std::size_t numBytes, const device &syclDevice, + const context &syclContext, const property_list &propList) { + return malloc(numBytes, syclDevice, syclContext, usm::alloc::device, + propList); +} + +void *malloc_device(std::size_t numBytes, const queue &syclQueue, + const property_list &propList) { + return malloc_device(numBytes, syclQueue.get_device(), + syclQueue.get_context(), propList); +} + +// SYCL 2020 4.8.3.3. Host allocation functions. + +void *malloc_host(std::size_t numBytes, const context &syclContext, + const property_list &propList) { + auto ContextDevices = syclContext.get_devices(); + assert(!ContextDevices.empty() && "Context can't be created without device"); + if (std::none_of( + ContextDevices.begin(), ContextDevices.end(), + [](device Dev) { return Dev.has(aspect::usm_host_allocations); })) + throw sycl::exception( + sycl::errc::feature_not_supported, + "All devices of context do not support host USM allocations."); + return malloc(numBytes, ContextDevices[0], syclContext, usm::alloc::host, + propList); +} + +void *malloc_host(std::size_t numBytes, const queue &syclQueue, + const property_list &propList) { + return malloc_host(numBytes, syclQueue.get_context(), propList); +} + +// SYCL 2020 4.8.3.4. Shared allocation functions. + +void *malloc_shared(std::size_t numBytes, const device &syclDevice, + const context &syclContext, const property_list &propList) { + return malloc(numBytes, syclDevice, syclContext, usm::alloc::shared, + propList); +} + +void *malloc_shared(std::size_t numBytes, const queue &syclQueue, + const property_list &propList) { + return malloc_shared(numBytes, syclQueue.get_device(), + syclQueue.get_context(), propList); +} + +// SYCL 2020 4.8.3.5. Parameterized allocation functions. + +static aspect getAspectByAllocationKind(usm::alloc kind) { + switch (kind) { + case usm::alloc::host: + return aspect::usm_host_allocations; + case usm::alloc::device: + return aspect::usm_device_allocations; + case usm::alloc::shared: + return aspect::usm_shared_allocations; + case usm::alloc::unknown: + // usm::alloc::unknown can be returned to user from get_pointer_type but + // it can't be converted to a valid backend type. + throw exception(sycl::make_error_code(sycl::errc::invalid), + "Invalid USM allocation kind requested"); + } +} + +void *malloc(std::size_t numBytes, const device &syclDevice, + const context &syclContext, usm::alloc kind, + const property_list &propList) { + auto ContextDevices = syclContext.get_devices(); + assert(!ContextDevices.empty() && "Context can't be created without device"); + if (std::none_of(ContextDevices.begin(), ContextDevices.end(), + [&syclDevice](device Dev) { return Dev == syclDevice; })) + throw exception(make_error_code(errc::invalid), + "Specified device is not contained by specified context."); + if (!syclDevice.has(getAspectByAllocationKind(kind))) + throw sycl::exception( + sycl::errc::feature_not_supported, + "Device doesn't support requested kind of USM allocation"); + + if (!numBytes) + return nullptr; + + void *Ptr{}; + auto Result = detail::callNoCheck( + olMemAlloc, detail::getSyclObjImpl(syclDevice)->getOLHandle(), + detail::getOlAllocType(kind), numBytes, &Ptr); + return detail::isFailed(Result) ? nullptr : Ptr; +} + +void *malloc(std::size_t numBytes, const queue &syclQueue, usm::alloc kind, + const property_list &propList) { + return malloc(numBytes, syclQueue.get_device(), syclQueue.get_context(), kind, + propList); +} + +// SYCL 2020 4.8.3.6. Memory deallocation functions. + +void free(void *ptr, const context &ctxt) { + std::ignore = ctxt; + detail::callAndThrow(olMemFree, ptr); +} + +void free(void *ptr, const queue &q) { return free(ptr, q.get_context()); } + +_LIBSYCL_END_NAMESPACE_SYCL diff --git a/libsycl/test/usm/alloc_functions.cpp b/libsycl/test/usm/alloc_functions.cpp new file mode 100644 index 000000000000..234f5cbd433b --- /dev/null +++ b/libsycl/test/usm/alloc_functions.cpp @@ -0,0 +1,124 @@ +// REQUIRES: any-device +// RUN: %clangxx %sycl_options %s -o %t.out +// RUN: %t.out + +#include + +#include +#include +#include + +using namespace sycl; + +constexpr size_t Align = 256; + +struct alignas(Align) Aligned { + int x; +}; + +int main() { + queue q; + context ctx = q.get_context(); + device d = q.get_device(); + + auto check = [&q](size_t Alignment, auto AllocFn, int Line = __builtin_LINE(), + int Case = 0) { + // First allocation might naturally be over-aligned. Do several of them to + // do the verification; + decltype(AllocFn()) Arr[10]; + for (auto *&Elem : Arr) + Elem = AllocFn(); + for (auto *Ptr : Arr) { + auto v = reinterpret_cast(Ptr); + if ((v & (Alignment - 1)) != 0) { + std::cout << "Failed at line " << Line << ", case " << Case + << std::endl; + assert(false && "Not properly aligned!"); + break; // To be used with commented out assert above. + } + } + for (auto *Ptr : Arr) + free(Ptr, q); + }; + + // The strictest (largest) fundamental alignment of any type is the alignment + // of max_align_t. This is, however, smaller than the minimal alignment + // returned by the underlying runtime as of now. + constexpr size_t FAlign = alignof(std::max_align_t); + + auto CheckAll = [&](size_t Expected, auto Funcs, + int Line = __builtin_LINE()) { + std::apply( + [&](auto... Fs) { + int Case = 0; + (void)std::initializer_list{ + (check(Expected, Fs, Line, Case++), 0)...}; + }, + Funcs); + }; + + auto MDevice = [&](auto... args) { + return malloc_device(sizeof(std::max_align_t), args...); + }; + CheckAll(FAlign, + std::tuple{[&]() { return MDevice(q); }, + [&]() { return MDevice(d, ctx); }, + [&]() { return MDevice(q, property_list{}); }, + [&]() { return MDevice(d, ctx, property_list{}); }}); + + auto MHost = [&](auto... args) { + return malloc_host(sizeof(std::max_align_t), args...); + }; + CheckAll(FAlign, + std::tuple{[&]() { return MHost(q); }, [&]() { return MHost(ctx); }, + [&]() { return MHost(q, property_list{}); }, + [&]() { return MHost(ctx, property_list{}); }}); + + if (d.has(aspect::usm_shared_allocations)) { + auto MShared = [&](auto... args) { + return malloc_shared(sizeof(std::max_align_t), args...); + }; + + CheckAll(FAlign, + std::tuple{[&]() { return MShared(q); }, + [&]() { return MShared(d, ctx); }, + [&]() { return MShared(q, property_list{}); }, + [&]() { return MShared(d, ctx, property_list{}); }}); + } + + auto TDevice = [&](auto... args) { + return malloc_device(1, args...); + }; + CheckAll(Align, std::tuple{[&]() { return TDevice(q); }, + [&]() { return TDevice(d, ctx); }}); + + auto THost = [&](auto... args) { return malloc_host(1, args...); }; + CheckAll(Align, std::tuple{[&]() { return THost(q); }, + [&]() { return THost(ctx); }}); + + if (d.has(aspect::usm_shared_allocations)) { + auto TShared = [&](auto... args) { + return malloc_shared(1, args...); + }; + CheckAll(Align, std::tuple{[&]() { return TShared(q); }, + [&]() { return TShared(d, ctx); }}); + } + + auto Malloc = [&](auto... args) { + return malloc(sizeof(std::max_align_t), args...); + }; + CheckAll( + FAlign, + std::tuple{ + [&]() { return Malloc(q, usm::alloc::host); }, + [&]() { return Malloc(d, ctx, usm::alloc::host); }, + [&]() { return Malloc(q, usm::alloc::host, property_list{}); }, + [&]() { return Malloc(d, ctx, usm::alloc::host, property_list{}); }}); + + auto TMalloc = [&](auto... args) { return malloc(1, args...); }; + CheckAll(Align, + std::tuple{[&]() { return TMalloc(q, usm::alloc::host); }, + [&]() { return TMalloc(d, ctx, usm::alloc::host); }}); + + return 0; +}