Remove unused parallel-libs project
Differential Revision: https://reviews.llvm.org/D112265
This commit is contained in:
parent
458ed5fcc3
commit
db0486c46f
@ -66,8 +66,7 @@ This is an example work-flow and configuration to get and build the LLVM source:
|
||||
* ``-DLLVM_ENABLE_PROJECTS='...'`` --- semicolon-separated list of the LLVM
|
||||
sub-projects you'd like to additionally build. Can include any of: clang,
|
||||
clang-tools-extra, compiler-rt,cross-project-tests, flang, libc, libclc,
|
||||
libcxx, libcxxabi, libunwind, lld, lldb, mlir, openmp, parallel-libs,
|
||||
polly, or pstl.
|
||||
libcxx, libcxxabi, libunwind, lld, lldb, mlir, openmp, polly, or pstl.
|
||||
|
||||
For example, to build LLVM, Clang, libcxx, and libcxxabi, use
|
||||
``-DLLVM_ENABLE_PROJECTS="clang;libcxx;libcxxabi"``.
|
||||
|
@ -64,7 +64,7 @@ endif()
|
||||
# LLVM_EXTERNAL_${project}_SOURCE_DIR using LLVM_ALL_PROJECTS
|
||||
# This allows an easy way of setting up a build directory for llvm and another
|
||||
# one for llvm+clang+... using the same sources.
|
||||
set(LLVM_ALL_PROJECTS "clang;clang-tools-extra;compiler-rt;cross-project-tests;libc;libclc;libcxx;libcxxabi;libunwind;lld;lldb;mlir;openmp;parallel-libs;polly;pstl")
|
||||
set(LLVM_ALL_PROJECTS "clang;clang-tools-extra;compiler-rt;cross-project-tests;libc;libclc;libcxx;libcxxabi;libunwind;lld;lldb;mlir;openmp;polly;pstl")
|
||||
# The flang project is not yet part of "all" projects (see C++ requirements)
|
||||
set(LLVM_EXTRA_PROJECTS "flang")
|
||||
# List of all known projects in the mono repo
|
||||
|
@ -491,7 +491,7 @@ enabled sub-projects. Nearly all of these variable names begin with
|
||||
This feature allows to have one build for only LLVM and another for clang+llvm
|
||||
using the same source checkout.
|
||||
The full list is:
|
||||
``clang;clang-tools-extra;compiler-rt;cross-project-tests;libc;libclc;libcxx;libcxxabi;libunwind;lld;lldb;openmp;parallel-libs;polly;pstl``
|
||||
``clang;clang-tools-extra;compiler-rt;cross-project-tests;libc;libclc;libcxx;libcxxabi;libunwind;lld;lldb;openmp;polly;pstl``
|
||||
|
||||
**LLVM_ENABLE_RUNTIMES**:STRING
|
||||
Build libc++, libc++abi or other projects using that a just-built compiler.
|
||||
|
@ -10,7 +10,6 @@ foreach(entry ${entries})
|
||||
(NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/libcxxabi) AND
|
||||
(NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/libunwind) AND
|
||||
(NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/test-suite) AND
|
||||
(NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/parallel-libs) AND
|
||||
(NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/openmp) AND
|
||||
(NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/cross-project-tests))
|
||||
get_filename_component(entry_name "${entry}" NAME)
|
||||
@ -41,7 +40,6 @@ if(${LLVM_BUILD_RUNTIME})
|
||||
endif()
|
||||
|
||||
add_llvm_external_project(dragonegg)
|
||||
add_llvm_external_project(parallel-libs)
|
||||
add_llvm_external_project(openmp)
|
||||
|
||||
if(LLVM_INCLUDE_TESTS)
|
||||
|
@ -69,7 +69,7 @@ def CreateLLVMProjects(single_tree_checkout):
|
||||
# Projects that reside inside 'projects/' in a single source tree checkout.
|
||||
ORDINARY_PROJECTS = [
|
||||
"compiler-rt", "dragonegg", "libcxx", "libcxxabi", "libunwind",
|
||||
"parallel-libs", "test-suite"
|
||||
"test-suite"
|
||||
]
|
||||
# Projects that reside inside 'tools/' in a single source tree checkout.
|
||||
TOOLS_PROJECTS = ["clang", "lld", "lldb"]
|
||||
|
@ -1,2 +0,0 @@
|
||||
BasedOnStyle: LLVM
|
||||
|
@ -1,17 +0,0 @@
|
||||
Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,-misc-unused-parameters,readability-identifier-naming'
|
||||
CheckOptions:
|
||||
- key: readability-identifier-naming.ClassCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.EnumCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.FunctionCase
|
||||
value: lowerCase
|
||||
- key: readability-identifier-naming.MemberCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.ParameterCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.UnionCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.VariableCase
|
||||
value: CamelCase
|
||||
|
@ -1 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.13.4)
|
@ -1,90 +0,0 @@
|
||||
=====================================================
|
||||
LLVM parallel-libs Subproject Charter
|
||||
=====================================================
|
||||
|
||||
----------------------------------------------
|
||||
Description
|
||||
----------------------------------------------
|
||||
The LLVM open source project will contain a subproject named `parallel-libs`
|
||||
which will host the development of libraries which are aimed at enabling
|
||||
parallelism in code and which are also closely tied to compiler technology.
|
||||
Examples of libraries suitable for hosting within the `parallel-libs`
|
||||
subproject are runtime libraries and parallel math libraries. The initial
|
||||
candidates for inclusion in this subproject are **StreamExecutor** and
|
||||
**libomptarget** which would live in the `streamexecutor` and `libomptarget`
|
||||
subdirectories of `parallel-libs`, respectively.
|
||||
|
||||
The `parallel-libs` project will host a collection of libraries where each
|
||||
library may be dependent on other libraries from the project or may be
|
||||
completely independent of any other libraries in the project. The rationale for
|
||||
hosting independent libraries within the same subproject is that all libraries
|
||||
in the project are providing related functionality that lives at the
|
||||
intersection of parallelism and compiler technology. It is expected that some
|
||||
libraries which initially began as independent will develop dependencies over
|
||||
time either between existing libraries or by extracting common code that can be
|
||||
used by each. One of the purposes of this subproject is to provide a working
|
||||
space where such refactoring and code sharing can take place.
|
||||
|
||||
Libraries in the `parallel-libs` subproject may also depend on the LLVM core
|
||||
libraries. This will be useful for avoiding duplication of code within the LLVM
|
||||
project for common utilities such as those found in the LLVM support library.
|
||||
|
||||
|
||||
----------------------------------------------
|
||||
Requirements
|
||||
----------------------------------------------
|
||||
Libraries included in the `parallel-libs` subproject must strive to achieve the
|
||||
following requirements:
|
||||
|
||||
1. Adhere to the LLVM coding standards.
|
||||
2. Use the LLVM build and test infrastructure.
|
||||
3. Be released under LLVM's license.
|
||||
|
||||
|
||||
Coding standards
|
||||
----------------
|
||||
Libraries in `parallel-libs` will match the LLVM coding standards. For existing
|
||||
projects being checked into the subproject as-is, an exception will be made
|
||||
during the initial check-in, with the understanding that the code will be
|
||||
promptly updated to follow the standards. Therefore, a three month grace period
|
||||
will be allowed for new libraries to meet the LLVM coding standards.
|
||||
|
||||
Additional exceptions to strict adherence to the LLVM coding standards may be
|
||||
allowed in certain other cases, but the reasons for such exceptions must be
|
||||
discussed and documented on a case-by-case basis.
|
||||
|
||||
|
||||
LLVM build and test infrastructure
|
||||
----------------------------------
|
||||
Using the LLVM build and test infrastructure currently means using `cmake` for
|
||||
building, `lit` for testing, and `buildbot` for automating build and testing.
|
||||
This project will follow the main LLVM project conventions here and track them
|
||||
as they evolve.
|
||||
|
||||
Each subproject library will be able to build separately without a single,
|
||||
unified cmake file, but each subproject libraries will also be integrated into
|
||||
the LLVM build so they can be built directly from the top level of the LLVM
|
||||
cmake infrastructure.
|
||||
|
||||
|
||||
LLVM license
|
||||
------------
|
||||
For simplicity, the `parallel-libs` project will use the normal LLVM license.
|
||||
While some runtime libraries use a dual license scheme in LLVM, we anticipate
|
||||
the project removing the need for this eventually and in the interim follow the
|
||||
simpler but still permissive license. Among other things, this makes it
|
||||
straightforward for these libraries to re-use core LLVM libraries where
|
||||
appropriate.
|
||||
|
||||
|
||||
----------------------------------------------
|
||||
Mailing List and Bugs
|
||||
----------------------------------------------
|
||||
Two mailing lists will be set up for the project:
|
||||
|
||||
1. parallel_libs-dev@lists.llvm.org for discussions among project developers, and
|
||||
2. parallel_libs-commits@lists.llvm.org for patches and commits to the project.
|
||||
|
||||
Each subproject library will manage its own components in Bugzilla. So, for
|
||||
example, there can be several Bugzilla components for different parts of
|
||||
StreamExecutor, etc.
|
@ -1 +0,0 @@
|
||||
BasedOnStyle: LLVM
|
@ -1,17 +0,0 @@
|
||||
Checks: '-*,clang-diagnostic-*,llvm-*,-llvm-header-guard,misc-*,-misc-unused-parameters,readability-identifier-naming'
|
||||
CheckOptions:
|
||||
- key: readability-identifier-naming.ClassCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.EnumCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.FunctionCase
|
||||
value: camelBack
|
||||
- key: readability-identifier-naming.MemberCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.ParameterCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.UnionCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.VariableCase
|
||||
value: CamelCase
|
||||
|
@ -1,79 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.13.4)
|
||||
|
||||
option(ACXXEL_ENABLE_UNIT_TESTS "enable acxxel unit tests" ON)
|
||||
option(ACXXEL_ENABLE_MULTI_DEVICE_UNIT_TESTS "enable acxxel multi-device unit tests" OFF)
|
||||
option(ACXXEL_ENABLE_EXAMPLES "enable acxxel examples" OFF)
|
||||
option(ACXXEL_ENABLE_DOXYGEN "enable Doxygen for acxxel" OFF)
|
||||
option(ACXXEL_ENABLE_CUDA "enable CUDA for acxxel" ON)
|
||||
option(ACXXEL_ENABLE_OPENCL "enable OpenCL for acxxel" ON)
|
||||
|
||||
project(acxxel)
|
||||
|
||||
if(ACXXEL_ENABLE_CUDA)
|
||||
find_package(CUDA REQUIRED)
|
||||
include_directories(${CUDA_INCLUDE_DIRS})
|
||||
find_library(CUDA_DRIVER_LIBRARY cuda)
|
||||
if(NOT CUDA_DRIVER_LIBRARY)
|
||||
message(FATAL_ERROR "could not find libcuda, is the CUDA driver installed on your system?")
|
||||
endif(NOT CUDA_DRIVER_LIBRARY)
|
||||
set(ACXXEL_CUDA_SOURCES cuda_acxxel.cpp)
|
||||
set(ACXXEL_CUDA_LIBRARIES ${CUDA_DRIVER_LIBRARY} ${CUDA_LIBRARIES})
|
||||
endif(ACXXEL_ENABLE_CUDA)
|
||||
|
||||
if(ACXXEL_ENABLE_OPENCL)
|
||||
find_package(OpenCL REQUIRED)
|
||||
include_directories(${OpenCL_INCLUDE_DIRS})
|
||||
set(ACXXEL_OPENCL_SOURCES opencl_acxxel.cpp)
|
||||
set(ACXXEL_OPENCL_LIBRARIES ${OpenCL_LIBRARIES})
|
||||
endif()
|
||||
|
||||
configure_file(config.h.in config.h)
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
# Insist on C++ 11 features.
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
# Add warning flags.
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
|
||||
if (CXX_SUPPORTS_SUGGEST_OVERRIDE_FLAG)
|
||||
add_compile_options("-Wno-suggest-override")
|
||||
endif()
|
||||
|
||||
add_library(
|
||||
acxxel
|
||||
acxxel.cpp
|
||||
${ACXXEL_CUDA_SOURCES}
|
||||
${ACXXEL_OPENCL_SOURCES})
|
||||
target_link_libraries(
|
||||
acxxel
|
||||
${ACXXEL_CUDA_LIBRARIES}
|
||||
${ACXXEL_OPENCL_LIBRARIES})
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
if(ACXXEL_ENABLE_EXAMPLES)
|
||||
add_subdirectory(examples)
|
||||
endif()
|
||||
|
||||
if(ACXXEL_ENABLE_UNIT_TESTS)
|
||||
enable_testing()
|
||||
find_package(GTest REQUIRED)
|
||||
include_directories(${GTEST_INCLUDE_DIRS})
|
||||
find_package(Threads)
|
||||
add_subdirectory(tests)
|
||||
endif()
|
||||
|
||||
if(ACXXEL_ENABLE_DOXYGEN)
|
||||
find_package(Doxygen REQUIRED)
|
||||
configure_file(Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
|
||||
add_custom_target(
|
||||
acxxel-doc
|
||||
${DOXYGEN_EXECUTABLE}
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
|
||||
WORKING_DIRECTORY
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
COMMENT
|
||||
"Generating acxxel API documentation with Doxygen"
|
||||
VERBATIM)
|
||||
endif()
|
File diff suppressed because it is too large
Load Diff
@ -1,278 +0,0 @@
|
||||
==============================================================================
|
||||
The LLVM Project is under the Apache License v2.0 with LLVM Exceptions:
|
||||
==============================================================================
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
|
||||
---- LLVM Exceptions to the Apache 2.0 License ----
|
||||
|
||||
As an exception, if, as a result of your compiling your source code, portions
|
||||
of this Software are embedded into an Object form of such source code, you
|
||||
may redistribute such embedded portions in such Object form without complying
|
||||
with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
|
||||
|
||||
In addition, if you combine or link compiled forms of this Software with
|
||||
software that is licensed under the GPLv2 ("Combined Software") and if a
|
||||
court of competent jurisdiction determines that the patent provision (Section
|
||||
3), the indemnity provision (Section 9) or other Section of the License
|
||||
conflicts with the conditions of the GPLv2, you may retroactively and
|
||||
prospectively choose to deem waived or otherwise exclude such Section(s) of
|
||||
the License, but only in their entirety and only with respect to the Combined
|
||||
Software.
|
||||
|
||||
==============================================================================
|
||||
Software from third parties included in the LLVM Project:
|
||||
==============================================================================
|
||||
The LLVM Project contains third party software which is under different license
|
||||
terms. All such code will be identified clearly using at least one of two
|
||||
mechanisms:
|
||||
1) It will be in a separate directory tree with its own `LICENSE.txt` or
|
||||
`LICENSE` file at the top containing the specific license and restrictions
|
||||
which apply to that software, or
|
||||
2) It will contain specific license and restriction terms at the top of every
|
||||
file.
|
||||
|
||||
==============================================================================
|
||||
Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy):
|
||||
==============================================================================
|
||||
University of Illinois/NCSA
|
||||
Open Source License
|
||||
|
||||
Copyright (c) 2007-2019 University of Illinois at Urbana-Champaign.
|
||||
All rights reserved.
|
||||
|
||||
Developed by:
|
||||
|
||||
LLVM Team
|
||||
|
||||
University of Illinois at Urbana-Champaign
|
||||
|
||||
http://llvm.org
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal with
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
of the Software, and to permit persons to whom the Software is furnished to do
|
||||
so, subject to the following conditions:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimers.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimers in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the names of the LLVM Team, University of Illinois at
|
||||
Urbana-Champaign, nor the names of its contributors may be used to
|
||||
endorse or promote products derived from this Software without specific
|
||||
prior written permission.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
|
||||
SOFTWARE.
|
@ -1,111 +0,0 @@
|
||||
//===--- acxxel.cpp - Implementation details for the Acxxel API -----------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "acxxel.h"
|
||||
#include "config.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
namespace acxxel {
|
||||
|
||||
namespace cuda {
|
||||
Expected<Platform *> getPlatform();
|
||||
} // namespace cuda
|
||||
|
||||
namespace opencl {
|
||||
Expected<Platform *> getPlatform();
|
||||
} // namespace opencl
|
||||
|
||||
void logWarning(const std::string &Message) {
|
||||
std::cerr << "WARNING: " << Message << "\n";
|
||||
}
|
||||
|
||||
Expected<Platform *> getCUDAPlatform() {
|
||||
#ifdef ACXXEL_ENABLE_CUDA
|
||||
return cuda::getPlatform();
|
||||
#else
|
||||
return Status("library was build without CUDA support");
|
||||
#endif
|
||||
}
|
||||
|
||||
Expected<Platform *> getOpenCLPlatform() {
|
||||
#ifdef ACXXEL_ENABLE_OPENCL
|
||||
return opencl::getPlatform();
|
||||
#else
|
||||
return Status("library was build without OpenCL support");
|
||||
#endif
|
||||
}
|
||||
|
||||
Stream::Stream(Stream &&) noexcept = default;
|
||||
Stream &Stream::operator=(Stream &&) noexcept = default;
|
||||
|
||||
Status Stream::sync() {
|
||||
return takeStatusOr(ThePlatform->streamSync(TheHandle.get()));
|
||||
}
|
||||
|
||||
Status Stream::waitOnEvent(Event &Event) {
|
||||
return takeStatusOr(ThePlatform->streamWaitOnEvent(
|
||||
TheHandle.get(), ThePlatform->getEventHandle(Event)));
|
||||
}
|
||||
|
||||
Stream &
|
||||
Stream::addCallback(std::function<void(Stream &, const Status &)> Callback) {
|
||||
setStatus(ThePlatform->addStreamCallback(*this, std::move(Callback)));
|
||||
return *this;
|
||||
}
|
||||
|
||||
Stream &Stream::asyncKernelLaunch(const Kernel &TheKernel,
|
||||
KernelLaunchDimensions LaunchDimensions,
|
||||
Span<void *> Arguments,
|
||||
Span<size_t> ArgumentSizes,
|
||||
size_t SharedMemoryBytes) {
|
||||
setStatus(ThePlatform->rawEnqueueKernelLaunch(
|
||||
TheHandle.get(), TheKernel.TheHandle.get(), LaunchDimensions, Arguments,
|
||||
ArgumentSizes, SharedMemoryBytes));
|
||||
return *this;
|
||||
}
|
||||
|
||||
Stream &Stream::enqueueEvent(Event &E) {
|
||||
setStatus(ThePlatform->enqueueEvent(ThePlatform->getEventHandle(E),
|
||||
TheHandle.get()));
|
||||
return *this;
|
||||
}
|
||||
|
||||
Event::Event(Event &&) noexcept = default;
|
||||
Event &Event::operator=(Event &&) noexcept = default;
|
||||
|
||||
bool Event::isDone() { return ThePlatform->eventIsDone(TheHandle.get()); }
|
||||
|
||||
Status Event::sync() { return ThePlatform->eventSync(TheHandle.get()); }
|
||||
|
||||
Expected<float> Event::getSecondsSince(const Event &Previous) {
|
||||
Expected<float> MaybeSeconds = ThePlatform->getSecondsBetweenEvents(
|
||||
Previous.TheHandle.get(), TheHandle.get());
|
||||
if (MaybeSeconds.isError())
|
||||
MaybeSeconds.getError();
|
||||
return MaybeSeconds;
|
||||
}
|
||||
|
||||
Expected<Kernel> Program::createKernel(const std::string &Name) {
|
||||
Expected<void *> MaybeKernelHandle =
|
||||
ThePlatform->rawCreateKernel(TheHandle.get(), Name);
|
||||
if (MaybeKernelHandle.isError())
|
||||
return MaybeKernelHandle.getError();
|
||||
return Kernel(ThePlatform, MaybeKernelHandle.getValue(),
|
||||
ThePlatform->getKernelHandleDestructor());
|
||||
}
|
||||
|
||||
Program::Program(Program &&) noexcept = default;
|
||||
Program &Program::operator=(Program &&That) noexcept = default;
|
||||
|
||||
Kernel::Kernel(Kernel &&) noexcept = default;
|
||||
Kernel &Kernel::operator=(Kernel &&That) noexcept = default;
|
||||
|
||||
} // namespace acxxel
|
File diff suppressed because it is too large
Load Diff
@ -1,15 +0,0 @@
|
||||
//===--- config.h - Macros generated during configuration -------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// This file declares macros that are generated during the configuration stage
|
||||
/// of the build.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#cmakedefine ACXXEL_ENABLE_CUDA
|
||||
#cmakedefine ACXXEL_ENABLE_OPENCL
|
@ -1,510 +0,0 @@
|
||||
//===--- cuda_acxxel.cpp - CUDA implementation of the Acxxel API ----------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// This file defines the standard CUDA implementation of the Acxxel API.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "acxxel.h"
|
||||
|
||||
#include "cuda.h"
|
||||
#include "cuda_runtime.h"
|
||||
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
namespace acxxel {
|
||||
|
||||
namespace {
|
||||
|
||||
static std::string getCUErrorMessage(CUresult Result) {
|
||||
if (!Result)
|
||||
return "success";
|
||||
const char *ErrorName = "UNKNOWN_ERROR_NAME";
|
||||
const char *ErrorDescription = "UNKNOWN_ERROR_DESCRIPTION";
|
||||
cuGetErrorName(Result, &ErrorName);
|
||||
cuGetErrorString(Result, &ErrorDescription);
|
||||
std::ostringstream OutStream;
|
||||
OutStream << "CUDA driver error: code = " << Result
|
||||
<< ", name = " << ErrorName
|
||||
<< ", description = " << ErrorDescription;
|
||||
return OutStream.str();
|
||||
}
|
||||
|
||||
static Status getCUError(CUresult Result, const std::string &Message) {
|
||||
if (!Result)
|
||||
return Status();
|
||||
std::ostringstream OutStream;
|
||||
OutStream << getCUErrorMessage(Result) << ", message = " << Message;
|
||||
return Status(OutStream.str());
|
||||
}
|
||||
|
||||
static std::string getCUDAErrorMessage(cudaError_t E) {
|
||||
if (!E)
|
||||
return "success";
|
||||
std::ostringstream OutStream;
|
||||
OutStream << "CUDA runtime error: code = " << E
|
||||
<< ", name = " << cudaGetErrorName(E)
|
||||
<< ", description = " << cudaGetErrorString(E);
|
||||
return OutStream.str();
|
||||
}
|
||||
|
||||
static Status getCUDAError(cudaError_t E, const std::string &Message) {
|
||||
if (!E)
|
||||
return Status();
|
||||
std::ostringstream OutStream;
|
||||
OutStream << getCUDAErrorMessage(E) << ", message = " << Message;
|
||||
return Status(OutStream.str());
|
||||
}
|
||||
|
||||
static void logCUWarning(CUresult Result, const std::string &Message) {
|
||||
if (Result) {
|
||||
std::ostringstream OutStream;
|
||||
OutStream << Message << ": " << getCUErrorMessage(Result);
|
||||
logWarning(OutStream.str());
|
||||
}
|
||||
}
|
||||
|
||||
/// A CUDA Platform implementation.
|
||||
class CUDAPlatform : public Platform {
|
||||
public:
|
||||
~CUDAPlatform() override = default;
|
||||
|
||||
static Expected<CUDAPlatform> create();
|
||||
|
||||
Expected<int> getDeviceCount() override;
|
||||
|
||||
Expected<Stream> createStream(int DeviceIndex) override;
|
||||
|
||||
Status streamSync(void *Stream) override;
|
||||
|
||||
Status streamWaitOnEvent(void *Stream, void *Event) override;
|
||||
|
||||
Expected<Event> createEvent(int DeviceIndex) override;
|
||||
|
||||
protected:
|
||||
Expected<void *> rawMallocD(ptrdiff_t ByteCount, int DeviceIndex) override;
|
||||
HandleDestructor getDeviceMemoryHandleDestructor() override;
|
||||
void *getDeviceMemorySpanHandle(void *BaseHandle, size_t ByteSize,
|
||||
size_t ByteOffset) override;
|
||||
virtual void rawDestroyDeviceMemorySpanHandle(void *Handle) override;
|
||||
|
||||
Expected<void *> rawGetDeviceSymbolAddress(const void *Symbol,
|
||||
int DeviceIndex) override;
|
||||
Expected<ptrdiff_t> rawGetDeviceSymbolSize(const void *Symbol,
|
||||
int DeviceIndex) override;
|
||||
|
||||
Status rawRegisterHostMem(const void *Memory, ptrdiff_t ByteCount) override;
|
||||
HandleDestructor getUnregisterHostMemoryHandleDestructor() override;
|
||||
|
||||
Expected<void *> rawMallocRegisteredH(ptrdiff_t ByteCount) override;
|
||||
HandleDestructor getFreeHostMemoryHandleDestructor() override;
|
||||
|
||||
Status asyncCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
|
||||
void *DeviceDst, ptrdiff_t DeviceDstByteOffset,
|
||||
ptrdiff_t ByteCount, void *Stream) override;
|
||||
Status asyncCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
|
||||
void *HostDst, ptrdiff_t ByteCount,
|
||||
void *Stream) override;
|
||||
Status asyncCopyHToD(const void *HostSrc, void *DeviceDst,
|
||||
ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount,
|
||||
void *Stream) override;
|
||||
|
||||
Status asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
|
||||
ptrdiff_t ByteCount, char ByteValue,
|
||||
void *Stream) override;
|
||||
|
||||
Status addStreamCallback(Stream &Stream, StreamCallback Callback) override;
|
||||
|
||||
Expected<Program> createProgramFromSource(Span<const char> Source,
|
||||
int DeviceIndex) override;
|
||||
|
||||
Status enqueueEvent(void *Event, void *Stream) override;
|
||||
bool eventIsDone(void *Event) override;
|
||||
Status eventSync(void *Event) override;
|
||||
Expected<float> getSecondsBetweenEvents(void *StartEvent,
|
||||
void *EndEvent) override;
|
||||
|
||||
Expected<void *> rawCreateKernel(void *Program,
|
||||
const std::string &Name) override;
|
||||
HandleDestructor getKernelHandleDestructor() override;
|
||||
|
||||
Status rawEnqueueKernelLaunch(void *Stream, void *Kernel,
|
||||
KernelLaunchDimensions LaunchDimensions,
|
||||
Span<void *> Arguments,
|
||||
Span<size_t> ArgumentSizes,
|
||||
size_t SharedMemoryBytes) override;
|
||||
|
||||
private:
|
||||
explicit CUDAPlatform(const std::vector<CUcontext> &Contexts)
|
||||
: TheContexts(Contexts) {}
|
||||
|
||||
Status setContext(int DeviceIndex) {
|
||||
if (DeviceIndex < 0 ||
|
||||
static_cast<size_t>(DeviceIndex) >= TheContexts.size())
|
||||
return Status("invalid deivce index " + std::to_string(DeviceIndex));
|
||||
return getCUError(cuCtxSetCurrent(TheContexts[DeviceIndex]),
|
||||
"cuCtxSetCurrent");
|
||||
}
|
||||
|
||||
// Vector of contexts for each device.
|
||||
std::vector<CUcontext> TheContexts;
|
||||
};
|
||||
|
||||
Expected<CUDAPlatform> CUDAPlatform::create() {
|
||||
std::vector<CUcontext> Contexts;
|
||||
if (CUresult Result = cuInit(0))
|
||||
return getCUError(Result, "cuInit");
|
||||
|
||||
int DeviceCount = 0;
|
||||
if (CUresult Result = cuDeviceGetCount(&DeviceCount))
|
||||
return getCUError(Result, "cuDeviceGetCount");
|
||||
|
||||
for (int I = 0; I < DeviceCount; ++I) {
|
||||
CUdevice Device;
|
||||
if (CUresult Result = cuDeviceGet(&Device, I))
|
||||
return getCUError(Result, "cuDeviceGet");
|
||||
CUcontext Context;
|
||||
if (CUresult Result = cuDevicePrimaryCtxRetain(&Context, Device))
|
||||
return getCUError(Result, "cuDevicePrimaryCtxRetain");
|
||||
if (CUresult Result = cuCtxSetCurrent(Context))
|
||||
return getCUError(Result, "cuCtxSetCurrent");
|
||||
Contexts.emplace_back(Context);
|
||||
}
|
||||
|
||||
return CUDAPlatform(Contexts);
|
||||
}
|
||||
|
||||
Expected<int> CUDAPlatform::getDeviceCount() {
|
||||
int Count = 0;
|
||||
if (CUresult Result = cuDeviceGetCount(&Count))
|
||||
return getCUError(Result, "cuDeviceGetCount");
|
||||
return Count;
|
||||
}
|
||||
|
||||
static void cudaDestroyStream(void *H) {
|
||||
logCUWarning(cuStreamDestroy(static_cast<CUstream_st *>(H)),
|
||||
"cuStreamDestroy");
|
||||
}
|
||||
|
||||
Expected<Stream> CUDAPlatform::createStream(int DeviceIndex) {
|
||||
Status S = setContext(DeviceIndex);
|
||||
if (S.isError())
|
||||
return S;
|
||||
unsigned int Flags = CU_STREAM_DEFAULT;
|
||||
CUstream Handle;
|
||||
if (CUresult Result = cuStreamCreate(&Handle, Flags))
|
||||
return getCUError(Result, "cuStreamCreate");
|
||||
return constructStream(this, DeviceIndex, Handle, cudaDestroyStream);
|
||||
}
|
||||
|
||||
Status CUDAPlatform::streamSync(void *Stream) {
|
||||
return getCUError(cuStreamSynchronize(static_cast<CUstream_st *>(Stream)),
|
||||
"cuStreamSynchronize");
|
||||
}
|
||||
|
||||
Status CUDAPlatform::streamWaitOnEvent(void *Stream, void *Event) {
|
||||
// CUDA docs says flags must be 0.
|
||||
unsigned int Flags = 0u;
|
||||
return getCUError(cuStreamWaitEvent(static_cast<CUstream_st *>(Stream),
|
||||
static_cast<CUevent_st *>(Event), Flags),
|
||||
"cuStreamWaitEvent");
|
||||
}
|
||||
|
||||
static void cudaDestroyEvent(void *H) {
|
||||
logCUWarning(cuEventDestroy(static_cast<CUevent_st *>(H)), "cuEventDestroy");
|
||||
}
|
||||
|
||||
Expected<Event> CUDAPlatform::createEvent(int DeviceIndex) {
|
||||
Status S = setContext(DeviceIndex);
|
||||
if (S.isError())
|
||||
return S;
|
||||
unsigned int Flags = CU_EVENT_DEFAULT;
|
||||
CUevent Handle;
|
||||
if (CUresult Result = cuEventCreate(&Handle, Flags))
|
||||
return getCUError(Result, "cuEventCreate");
|
||||
return constructEvent(this, DeviceIndex, Handle, cudaDestroyEvent);
|
||||
}
|
||||
|
||||
Status CUDAPlatform::enqueueEvent(void *Event, void *Stream) {
|
||||
return getCUError(cuEventRecord(static_cast<CUevent_st *>(Event),
|
||||
static_cast<CUstream_st *>(Stream)),
|
||||
"cuEventRecord");
|
||||
}
|
||||
|
||||
bool CUDAPlatform::eventIsDone(void *Event) {
|
||||
return cuEventQuery(static_cast<CUevent_st *>(Event)) != CUDA_ERROR_NOT_READY;
|
||||
}
|
||||
|
||||
Status CUDAPlatform::eventSync(void *Event) {
|
||||
return getCUError(cuEventSynchronize(static_cast<CUevent_st *>(Event)),
|
||||
"cuEventSynchronize");
|
||||
}
|
||||
|
||||
Expected<float> CUDAPlatform::getSecondsBetweenEvents(void *StartEvent,
|
||||
void *EndEvent) {
|
||||
float Milliseconds;
|
||||
if (CUresult Result = cuEventElapsedTime(
|
||||
&Milliseconds, static_cast<CUevent_st *>(StartEvent),
|
||||
static_cast<CUevent_st *>(EndEvent)))
|
||||
return getCUError(Result, "cuEventElapsedTime");
|
||||
return Milliseconds * 1e-6;
|
||||
}
|
||||
|
||||
Expected<void *> CUDAPlatform::rawMallocD(ptrdiff_t ByteCount,
|
||||
int DeviceIndex) {
|
||||
Status S = setContext(DeviceIndex);
|
||||
if (S.isError())
|
||||
return S;
|
||||
if (!ByteCount)
|
||||
return nullptr;
|
||||
CUdeviceptr Pointer;
|
||||
if (CUresult Result = cuMemAlloc(&Pointer, ByteCount))
|
||||
return getCUError(Result, "cuMemAlloc");
|
||||
return reinterpret_cast<void *>(Pointer);
|
||||
}
|
||||
|
||||
static void cudaDestroyDeviceMemory(void *H) {
|
||||
logCUWarning(cuMemFree(reinterpret_cast<CUdeviceptr>(H)), "cuMemFree");
|
||||
}
|
||||
|
||||
HandleDestructor CUDAPlatform::getDeviceMemoryHandleDestructor() {
|
||||
return cudaDestroyDeviceMemory;
|
||||
}
|
||||
|
||||
void *CUDAPlatform::getDeviceMemorySpanHandle(void *BaseHandle, size_t,
|
||||
size_t ByteOffset) {
|
||||
return static_cast<char *>(BaseHandle) + ByteOffset;
|
||||
}
|
||||
|
||||
void CUDAPlatform::rawDestroyDeviceMemorySpanHandle(void *) {
|
||||
// Do nothing for this platform.
|
||||
}
|
||||
|
||||
Expected<void *> CUDAPlatform::rawGetDeviceSymbolAddress(const void *Symbol,
|
||||
int DeviceIndex) {
|
||||
Status S = setContext(DeviceIndex);
|
||||
if (S.isError())
|
||||
return S;
|
||||
void *Address;
|
||||
if (cudaError_t Status = cudaGetSymbolAddress(&Address, Symbol))
|
||||
return getCUDAError(Status, "cudaGetSymbolAddress");
|
||||
return Address;
|
||||
}
|
||||
|
||||
Expected<ptrdiff_t> CUDAPlatform::rawGetDeviceSymbolSize(const void *Symbol,
|
||||
int DeviceIndex) {
|
||||
Status S = setContext(DeviceIndex);
|
||||
if (S.isError())
|
||||
return S;
|
||||
size_t Size;
|
||||
if (cudaError_t Status = cudaGetSymbolSize(&Size, Symbol))
|
||||
return getCUDAError(Status, "cudaGetSymbolSize");
|
||||
return Size;
|
||||
}
|
||||
|
||||
static const void *offsetVoidPtr(const void *Ptr, ptrdiff_t ByteOffset) {
|
||||
return static_cast<const void *>(static_cast<const char *>(Ptr) + ByteOffset);
|
||||
}
|
||||
|
||||
static void *offsetVoidPtr(void *Ptr, ptrdiff_t ByteOffset) {
|
||||
return static_cast<void *>(static_cast<char *>(Ptr) + ByteOffset);
|
||||
}
|
||||
|
||||
Status CUDAPlatform::rawRegisterHostMem(const void *Memory,
|
||||
ptrdiff_t ByteCount) {
|
||||
unsigned int Flags = 0;
|
||||
return getCUError(
|
||||
cuMemHostRegister(const_cast<void *>(Memory), ByteCount, Flags),
|
||||
"cuMemHostRegiser");
|
||||
}
|
||||
|
||||
static void cudaUnregisterHostMemoryHandleDestructor(void *H) {
|
||||
logCUWarning(cuMemHostUnregister(H), "cuMemHostUnregister");
|
||||
}
|
||||
|
||||
HandleDestructor CUDAPlatform::getUnregisterHostMemoryHandleDestructor() {
|
||||
return cudaUnregisterHostMemoryHandleDestructor;
|
||||
}
|
||||
|
||||
Expected<void *> CUDAPlatform::rawMallocRegisteredH(ptrdiff_t ByteCount) {
|
||||
unsigned int Flags = 0;
|
||||
void *Memory;
|
||||
if (CUresult Result = cuMemHostAlloc(&Memory, ByteCount, Flags))
|
||||
return getCUError(Result, "cuMemHostAlloc");
|
||||
return Memory;
|
||||
}
|
||||
|
||||
static void cudaFreeHostMemoryHandleDestructor(void *H) {
|
||||
logCUWarning(cuMemFreeHost(H), "cuMemFreeHost");
|
||||
}
|
||||
|
||||
HandleDestructor CUDAPlatform::getFreeHostMemoryHandleDestructor() {
|
||||
return cudaFreeHostMemoryHandleDestructor;
|
||||
}
|
||||
|
||||
Status CUDAPlatform::asyncCopyDToD(const void *DeviceSrc,
|
||||
ptrdiff_t DeviceSrcByteOffset,
|
||||
void *DeviceDst,
|
||||
ptrdiff_t DeviceDstByteOffset,
|
||||
ptrdiff_t ByteCount, void *Stream) {
|
||||
return getCUError(
|
||||
cuMemcpyDtoDAsync(reinterpret_cast<CUdeviceptr>(
|
||||
offsetVoidPtr(DeviceDst, DeviceDstByteOffset)),
|
||||
reinterpret_cast<CUdeviceptr>(
|
||||
offsetVoidPtr(DeviceSrc, DeviceSrcByteOffset)),
|
||||
ByteCount, static_cast<CUstream_st *>(Stream)),
|
||||
"cuMemcpyDtoDAsync");
|
||||
}
|
||||
|
||||
Status CUDAPlatform::asyncCopyDToH(const void *DeviceSrc,
|
||||
ptrdiff_t DeviceSrcByteOffset, void *HostDst,
|
||||
ptrdiff_t ByteCount, void *Stream) {
|
||||
return getCUError(
|
||||
cuMemcpyDtoHAsync(HostDst, reinterpret_cast<CUdeviceptr>(offsetVoidPtr(
|
||||
DeviceSrc, DeviceSrcByteOffset)),
|
||||
ByteCount, static_cast<CUstream_st *>(Stream)),
|
||||
"cuMemcpyDtoHAsync");
|
||||
}
|
||||
|
||||
Status CUDAPlatform::asyncCopyHToD(const void *HostSrc, void *DeviceDst,
|
||||
ptrdiff_t DeviceDstByteOffset,
|
||||
ptrdiff_t ByteCount, void *Stream) {
|
||||
return getCUError(
|
||||
cuMemcpyHtoDAsync(reinterpret_cast<CUdeviceptr>(
|
||||
offsetVoidPtr(DeviceDst, DeviceDstByteOffset)),
|
||||
HostSrc, ByteCount, static_cast<CUstream_st *>(Stream)),
|
||||
"cuMemcpyHtoDAsync");
|
||||
}
|
||||
|
||||
Status CUDAPlatform::asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
|
||||
ptrdiff_t ByteCount, char ByteValue,
|
||||
void *Stream) {
|
||||
return getCUError(
|
||||
cuMemsetD8Async(
|
||||
reinterpret_cast<CUdeviceptr>(offsetVoidPtr(DeviceDst, ByteOffset)),
|
||||
ByteValue, ByteCount, static_cast<CUstream_st *>(Stream)),
|
||||
"cuMemsetD8Async");
|
||||
}
|
||||
|
||||
struct StreamCallbackUserData {
|
||||
StreamCallbackUserData(Stream &Stream, StreamCallback Function)
|
||||
: TheStream(Stream), TheFunction(std::move(Function)) {}
|
||||
|
||||
Stream &TheStream;
|
||||
StreamCallback TheFunction;
|
||||
};
|
||||
|
||||
static void CUDA_CB cuStreamCallbackShim(CUstream HStream, CUresult Status,
|
||||
void *UserData) {
|
||||
std::unique_ptr<StreamCallbackUserData> Data(
|
||||
static_cast<StreamCallbackUserData *>(UserData));
|
||||
Stream &TheStream = Data->TheStream;
|
||||
assert(static_cast<CUstream_st *>(TheStream) == HStream);
|
||||
Data->TheFunction(TheStream,
|
||||
getCUError(Status, "stream callback error state"));
|
||||
}
|
||||
|
||||
Status CUDAPlatform::addStreamCallback(Stream &Stream,
|
||||
StreamCallback Callback) {
|
||||
// CUDA docs say flags must always be 0 here.
|
||||
unsigned int Flags = 0u;
|
||||
std::unique_ptr<StreamCallbackUserData> UserData(
|
||||
new StreamCallbackUserData(Stream, std::move(Callback)));
|
||||
return getCUError(cuStreamAddCallback(Stream, cuStreamCallbackShim,
|
||||
UserData.release(), Flags),
|
||||
"cuStreamAddCallback");
|
||||
}
|
||||
|
||||
static void cudaDestroyProgram(void *H) {
|
||||
logCUWarning(cuModuleUnload(static_cast<CUmod_st *>(H)), "cuModuleUnload");
|
||||
}
|
||||
|
||||
Expected<Program> CUDAPlatform::createProgramFromSource(Span<const char> Source,
|
||||
int DeviceIndex) {
|
||||
Status S = setContext(DeviceIndex);
|
||||
if (S.isError())
|
||||
return S;
|
||||
CUmodule Module;
|
||||
constexpr int LogBufferSizeBytes = 1024;
|
||||
char InfoLogBuffer[LogBufferSizeBytes];
|
||||
char ErrorLogBuffer[LogBufferSizeBytes];
|
||||
constexpr size_t OptionsCount = 4;
|
||||
std::array<CUjit_option, OptionsCount> OptionNames = {
|
||||
{CU_JIT_INFO_LOG_BUFFER, CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
|
||||
CU_JIT_ERROR_LOG_BUFFER, CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES}};
|
||||
std::array<void *, OptionsCount> OptionValues = {
|
||||
{InfoLogBuffer, const_cast<int *>(&LogBufferSizeBytes), ErrorLogBuffer,
|
||||
const_cast<int *>(&LogBufferSizeBytes)}};
|
||||
if (CUresult Result =
|
||||
cuModuleLoadDataEx(&Module, Source.data(), OptionsCount,
|
||||
OptionNames.data(), OptionValues.data())) {
|
||||
InfoLogBuffer[LogBufferSizeBytes - 1] = '\0';
|
||||
ErrorLogBuffer[LogBufferSizeBytes - 1] = '\0';
|
||||
std::ostringstream OutStream;
|
||||
OutStream << "Error creating program from source: "
|
||||
<< getCUErrorMessage(Result)
|
||||
<< "\nINFO MESSAGES\n================\n"
|
||||
<< InfoLogBuffer << "\nERROR MESSAGES\n==================\n"
|
||||
<< ErrorLogBuffer;
|
||||
return Status(OutStream.str());
|
||||
}
|
||||
return constructProgram(this, Module, cudaDestroyProgram);
|
||||
}
|
||||
|
||||
Expected<void *> CUDAPlatform::rawCreateKernel(void *Program,
|
||||
const std::string &Name) {
|
||||
CUmodule Module = static_cast<CUmodule>(Program);
|
||||
CUfunction Kernel;
|
||||
if (CUresult Result = cuModuleGetFunction(&Kernel, Module, Name.c_str()))
|
||||
return getCUError(Result, "cuModuleGetFunction");
|
||||
return Kernel;
|
||||
}
|
||||
|
||||
static void cudaDestroyKernel(void *) {
|
||||
// Do nothing.
|
||||
}
|
||||
|
||||
HandleDestructor CUDAPlatform::getKernelHandleDestructor() {
|
||||
return cudaDestroyKernel;
|
||||
}
|
||||
|
||||
Status CUDAPlatform::rawEnqueueKernelLaunch(
|
||||
void *Stream, void *Kernel, KernelLaunchDimensions LaunchDimensions,
|
||||
Span<void *> Arguments, Span<size_t>, size_t SharedMemoryBytes) {
|
||||
return getCUError(
|
||||
cuLaunchKernel(static_cast<CUfunction>(Kernel), LaunchDimensions.GridX,
|
||||
LaunchDimensions.GridY, LaunchDimensions.GridZ,
|
||||
LaunchDimensions.BlockX, LaunchDimensions.BlockY,
|
||||
LaunchDimensions.BlockZ, SharedMemoryBytes,
|
||||
static_cast<CUstream>(Stream), Arguments.data(), nullptr),
|
||||
"cuLaunchKernel");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace cuda {
|
||||
|
||||
/// Gets the CUDAPlatform instance and returns it as an unowned pointer to a
|
||||
/// Platform.
|
||||
Expected<Platform *> getPlatform() {
|
||||
static auto MaybePlatform = []() -> Expected<CUDAPlatform *> {
|
||||
Expected<CUDAPlatform> CreationResult = CUDAPlatform::create();
|
||||
if (CreationResult.isError())
|
||||
return CreationResult.getError();
|
||||
else
|
||||
return new CUDAPlatform(CreationResult.takeValue());
|
||||
}();
|
||||
return MaybePlatform;
|
||||
}
|
||||
|
||||
} // namespace cuda
|
||||
|
||||
} // namespace acxxel
|
@ -1,20 +0,0 @@
|
||||
body {
|
||||
background-color: #e0e0eb;
|
||||
}
|
||||
|
||||
div.header {
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
max-width: 60em;
|
||||
padding-left: 2em;
|
||||
padding-right: 2em;
|
||||
}
|
||||
|
||||
div.contents {
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
max-width: 60em;
|
||||
background-color: white;
|
||||
padding: 2em;
|
||||
border-radius: 1em;
|
||||
}
|
@ -1,12 +0,0 @@
|
||||
set(CUDA_HOST_COMPILER gcc)
|
||||
set(CUDA_NVCC_FLAGS -std=c++11)
|
||||
|
||||
if(ACXXEL_ENABLE_CUDA)
|
||||
cuda_add_executable(simple_example simple_example.cu)
|
||||
target_link_libraries(simple_example acxxel)
|
||||
endif()
|
||||
|
||||
if(ACXXEL_ENABLE_OPENCL)
|
||||
add_executable(opencl_example opencl_example.cpp)
|
||||
target_link_libraries(opencl_example acxxel ${OpenCL_LIBRARIES})
|
||||
endif()
|
@ -1,69 +0,0 @@
|
||||
//===--- opencl_example.cpp - Example of using Acxxel with OpenCL ---------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// This file is an example of using OpenCL with Acxxel.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "acxxel.h"
|
||||
|
||||
#include <array>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
||||
static const char *SaxpyKernelSource = R"(
|
||||
__kernel void saxpyKernel(float A, __global float *X, __global float *Y, int N) {
|
||||
int I = get_global_id(0);
|
||||
if (I < N)
|
||||
X[I] = A * X[I] + Y[I];
|
||||
}
|
||||
)";
|
||||
|
||||
template <size_t N>
|
||||
void saxpy(float A, std::array<float, N> &X, const std::array<float, N> &Y) {
|
||||
acxxel::Platform *OpenCL = acxxel::getOpenCLPlatform().getValue();
|
||||
acxxel::Stream Stream = OpenCL->createStream().takeValue();
|
||||
auto DeviceX = OpenCL->mallocD<float>(N).takeValue();
|
||||
auto DeviceY = OpenCL->mallocD<float>(N).takeValue();
|
||||
Stream.syncCopyHToD(X, DeviceX).syncCopyHToD(Y, DeviceY);
|
||||
acxxel::Program Program =
|
||||
OpenCL
|
||||
->createProgramFromSource(acxxel::Span<const char>(
|
||||
SaxpyKernelSource, std::strlen(SaxpyKernelSource)))
|
||||
.takeValue();
|
||||
acxxel::Kernel Kernel = Program.createKernel("saxpyKernel").takeValue();
|
||||
float *RawX = static_cast<float *>(DeviceX);
|
||||
float *RawY = static_cast<float *>(DeviceY);
|
||||
int IntLength = N;
|
||||
void *Arguments[] = {&A, &RawX, &RawY, &IntLength};
|
||||
size_t ArgumentSizes[] = {sizeof(float), sizeof(float *), sizeof(float *),
|
||||
sizeof(int)};
|
||||
acxxel::Status Status =
|
||||
Stream.asyncKernelLaunch(Kernel, N, Arguments, ArgumentSizes)
|
||||
.syncCopyDToH(DeviceX, X)
|
||||
.sync();
|
||||
if (Status.isError()) {
|
||||
std::fprintf(stderr, "Error during saxpy: %s\n",
|
||||
Status.getMessage().c_str());
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
float A = 2.f;
|
||||
std::array<float, 3> X{{0.f, 1.f, 2.f}};
|
||||
std::array<float, 3> Y{{3.f, 4.f, 5.f}};
|
||||
std::array<float, 3> Expected{{3.f, 6.f, 9.f}};
|
||||
saxpy(A, X, Y);
|
||||
for (int I = 0; I < 3; ++I)
|
||||
if (X[I] != Expected[I]) {
|
||||
std::fprintf(stderr, "Mismatch at position %d, %f != %f\n", I, X[I],
|
||||
Expected[I]);
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
@ -1,109 +0,0 @@
|
||||
//===--- simple_example.cu - Simple example of using Acxxel ---------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// This file is a simple example of using Acxxel.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// [Example simple saxpy]
|
||||
#include "acxxel.h"
|
||||
|
||||
#include <array>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
// A standard CUDA kernel.
|
||||
__global__ void saxpyKernel(float A, float *X, float *Y, int N) {
|
||||
int I = (blockDim.x * blockIdx.x) + threadIdx.x;
|
||||
if (I < N)
|
||||
X[I] = A * X[I] + Y[I];
|
||||
}
|
||||
|
||||
// A host library wrapping the CUDA kernel. All Acxxel calls are in here.
|
||||
template <size_t N>
|
||||
void saxpy(float A, std::array<float, N> &X, const std::array<float, N> &Y) {
|
||||
// Get the CUDA platform and make a CUDA stream.
|
||||
acxxel::Platform *CUDA = acxxel::getCUDAPlatform().getValue();
|
||||
acxxel::Stream Stream = CUDA->createStream().takeValue();
|
||||
|
||||
// Allocate space for device arrays.
|
||||
auto DeviceX = CUDA->mallocD<float>(N).takeValue();
|
||||
auto DeviceY = CUDA->mallocD<float>(N).takeValue();
|
||||
|
||||
// Copy X and Y out to the device.
|
||||
Stream.syncCopyHToD(X, DeviceX).syncCopyHToD(Y, DeviceY);
|
||||
|
||||
// Launch the kernel using triple-chevron notation.
|
||||
saxpyKernel<<<1, N, 0, Stream>>>(A, DeviceX, DeviceY, N);
|
||||
|
||||
// Copy the results back to the host.
|
||||
acxxel::Status Status = Stream.syncCopyDToH(DeviceX, X).takeStatus();
|
||||
|
||||
// Check for any errors.
|
||||
if (Status.isError()) {
|
||||
std::fprintf(stderr, "Error performing acxxel saxpy: %s\n",
|
||||
Status.getMessage().c_str());
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
/// [Example simple saxpy]
|
||||
|
||||
/// [Example CUDA simple saxpy]
|
||||
template <size_t N>
|
||||
void cudaSaxpy(float A, std::array<float, N> &X, std::array<float, N> &Y) {
|
||||
// This size is needed all over the place, so give it a name.
|
||||
constexpr size_t Size = N * sizeof(float);
|
||||
|
||||
// Allocate space for device arrays.
|
||||
float *DeviceX;
|
||||
float *DeviceY;
|
||||
cudaMalloc(&DeviceX, Size);
|
||||
cudaMalloc(&DeviceY, Size);
|
||||
|
||||
// Copy X and Y out to the device.
|
||||
cudaMemcpy(DeviceX, X.data(), Size, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(DeviceY, Y.data(), Size, cudaMemcpyHostToDevice);
|
||||
|
||||
// Launch the kernel using triple-chevron notation.
|
||||
saxpyKernel<<<1, N>>>(A, DeviceX, DeviceY, N);
|
||||
|
||||
// Copy the results back to the host.
|
||||
cudaMemcpy(X.data(), DeviceX, Size, cudaMemcpyDeviceToHost);
|
||||
|
||||
// Free resources.
|
||||
cudaFree(DeviceX);
|
||||
cudaFree(DeviceY);
|
||||
|
||||
// Check for any errors.
|
||||
cudaError_t Error = cudaGetLastError();
|
||||
if (Error) {
|
||||
std::fprintf(stderr, "Error performing cudart saxpy: %s\n",
|
||||
cudaGetErrorString(Error));
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
/// [Example CUDA simple saxpy]
|
||||
|
||||
template <typename F> void testSaxpy(F &&SaxpyFunction) {
|
||||
float A = 2.f;
|
||||
std::array<float, 3> X = {{0.f, 1.f, 2.f}};
|
||||
std::array<float, 3> Y = {{3.f, 4.f, 5.f}};
|
||||
std::array<float, 3> Expected = {{3.f, 6.f, 9.f}};
|
||||
SaxpyFunction(A, X, Y);
|
||||
for (int I = 0; I < 3; ++I)
|
||||
if (X[I] != Expected[I]) {
|
||||
std::fprintf(stderr, "Result mismatch at index %d, %f != %f\n", I, X[I],
|
||||
Expected[I]);
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
testSaxpy(saxpy<3>);
|
||||
testSaxpy(cudaSaxpy<3>);
|
||||
}
|
@ -1,550 +0,0 @@
|
||||
//===--- opencl_acxxel.cpp - OpenCL implementation of the Acxxel API ------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// This file defines the standard OpenCL implementation of the Acxxel API.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "acxxel.h"
|
||||
|
||||
#include "CL/cl.h"
|
||||
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace acxxel {
|
||||
|
||||
namespace {
|
||||
|
||||
/// An ID containing the platform ID and the device ID within the platform.
|
||||
struct FullDeviceID {
|
||||
cl_platform_id PlatformID;
|
||||
cl_device_id DeviceID;
|
||||
|
||||
FullDeviceID(cl_platform_id PlatformID, cl_device_id DeviceID)
|
||||
: PlatformID(PlatformID), DeviceID(DeviceID) {}
|
||||
};
|
||||
|
||||
static std::string getOpenCLErrorMessage(cl_int Result) {
|
||||
if (!Result)
|
||||
return "success";
|
||||
std::ostringstream OutStream;
|
||||
OutStream << "OpenCL error: code = " << Result;
|
||||
return OutStream.str();
|
||||
}
|
||||
|
||||
static Status getOpenCLError(cl_int Result, const std::string &Message) {
|
||||
if (!Result)
|
||||
return Status();
|
||||
std::ostringstream OutStream;
|
||||
OutStream << getOpenCLErrorMessage(Result) << ", message = " << Message;
|
||||
return Status(OutStream.str());
|
||||
}
|
||||
|
||||
static void logOpenCLWarning(cl_int Result, const std::string &Message) {
|
||||
if (Result) {
|
||||
std::ostringstream OutStream;
|
||||
OutStream << Message << ": " << getOpenCLErrorMessage(Result);
|
||||
logWarning(OutStream.str());
|
||||
}
|
||||
}
|
||||
|
||||
class OpenCLPlatform : public Platform {
|
||||
public:
|
||||
~OpenCLPlatform() override = default;
|
||||
|
||||
static Expected<OpenCLPlatform> create();
|
||||
|
||||
Expected<int> getDeviceCount() override;
|
||||
|
||||
Expected<Stream> createStream(int DeviceIndex) override;
|
||||
|
||||
Expected<Event> createEvent(int DeviceIndex) override;
|
||||
|
||||
Expected<Program> createProgramFromSource(Span<const char> Source,
|
||||
int DeviceIndex) override;
|
||||
|
||||
protected:
|
||||
Status streamSync(void *Stream) override;
|
||||
|
||||
Status streamWaitOnEvent(void *Stream, void *Event) override;
|
||||
|
||||
Expected<void *> rawMallocD(ptrdiff_t ByteCount, int DeviceIndex) override;
|
||||
HandleDestructor getDeviceMemoryHandleDestructor() override;
|
||||
void *getDeviceMemorySpanHandle(void *BaseHandle, size_t ByteSize,
|
||||
size_t ByteOffset) override;
|
||||
void rawDestroyDeviceMemorySpanHandle(void *Handle) override;
|
||||
|
||||
Expected<void *> rawGetDeviceSymbolAddress(const void *Symbol,
|
||||
int DeviceIndex) override;
|
||||
Expected<ptrdiff_t> rawGetDeviceSymbolSize(const void *Symbol,
|
||||
int DeviceIndex) override;
|
||||
|
||||
Status rawRegisterHostMem(const void *Memory, ptrdiff_t ByteCount) override;
|
||||
HandleDestructor getUnregisterHostMemoryHandleDestructor() override;
|
||||
|
||||
Expected<void *> rawMallocRegisteredH(ptrdiff_t ByteCount) override;
|
||||
HandleDestructor getFreeHostMemoryHandleDestructor() override;
|
||||
|
||||
Status asyncCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
|
||||
void *DeviceDst, ptrdiff_t DeviceDstByteOffset,
|
||||
ptrdiff_t ByteCount, void *Stream) override;
|
||||
Status asyncCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
|
||||
void *HostDst, ptrdiff_t ByteCount,
|
||||
void *Stream) override;
|
||||
Status asyncCopyHToD(const void *HostSrc, void *DeviceDst,
|
||||
ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount,
|
||||
void *Stream) override;
|
||||
|
||||
Status asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
|
||||
ptrdiff_t ByteCount, char ByteValue,
|
||||
void *Stream) override;
|
||||
|
||||
Status addStreamCallback(Stream &Stream, StreamCallback Callback) override;
|
||||
|
||||
Status enqueueEvent(void *Event, void *Stream) override;
|
||||
bool eventIsDone(void *Event) override;
|
||||
Status eventSync(void *Event) override;
|
||||
Expected<float> getSecondsBetweenEvents(void *StartEvent,
|
||||
void *EndEvent) override;
|
||||
|
||||
Expected<void *> rawCreateKernel(void *Program,
|
||||
const std::string &Name) override;
|
||||
HandleDestructor getKernelHandleDestructor() override;
|
||||
|
||||
Status rawEnqueueKernelLaunch(void *Stream, void *Kernel,
|
||||
KernelLaunchDimensions LaunchDimensions,
|
||||
Span<void *> Arguments,
|
||||
Span<size_t> ArgumentSizes,
|
||||
size_t SharedMemoryBytes) override;
|
||||
|
||||
private:
|
||||
OpenCLPlatform(std::vector<FullDeviceID> &&FullDeviceIDs,
|
||||
std::vector<cl_context> &&Contexts,
|
||||
std::vector<cl_command_queue> &&CommandQueues)
|
||||
: FullDeviceIDs(std::move(FullDeviceIDs)), Contexts(std::move(Contexts)),
|
||||
CommandQueues(std::move(CommandQueues)) {}
|
||||
|
||||
std::vector<FullDeviceID> FullDeviceIDs;
|
||||
std::vector<cl_context> Contexts;
|
||||
std::vector<cl_command_queue> CommandQueues;
|
||||
};
|
||||
|
||||
Expected<OpenCLPlatform> OpenCLPlatform::create() {
|
||||
constexpr cl_uint MaxNumEntries = 100;
|
||||
cl_platform_id Platforms[MaxNumEntries];
|
||||
cl_uint NumPlatforms;
|
||||
if (cl_int Result = clGetPlatformIDs(MaxNumEntries, Platforms, &NumPlatforms))
|
||||
return getOpenCLError(Result, "clGetPlatformIDs");
|
||||
|
||||
std::vector<FullDeviceID> FullDeviceIDs;
|
||||
for (cl_uint PlatformIndex = 0; PlatformIndex < NumPlatforms;
|
||||
++PlatformIndex) {
|
||||
cl_uint NumDevices;
|
||||
cl_device_id Devices[MaxNumEntries];
|
||||
if (cl_int Result =
|
||||
clGetDeviceIDs(Platforms[PlatformIndex], CL_DEVICE_TYPE_ALL,
|
||||
MaxNumEntries, Devices, &NumDevices))
|
||||
return getOpenCLError(Result, "clGetDeviceIDs");
|
||||
for (cl_uint DeviceIndex = 0; DeviceIndex < NumDevices; ++DeviceIndex)
|
||||
FullDeviceIDs.emplace_back(Platforms[PlatformIndex],
|
||||
Devices[DeviceIndex]);
|
||||
}
|
||||
|
||||
if (FullDeviceIDs.empty())
|
||||
return Status("No OpenCL device available on this system.");
|
||||
|
||||
std::vector<cl_context> Contexts(FullDeviceIDs.size());
|
||||
std::vector<cl_command_queue> CommandQueues(FullDeviceIDs.size());
|
||||
for (size_t I = 0; I < FullDeviceIDs.size(); ++I) {
|
||||
cl_int CreateContextResult;
|
||||
Contexts[I] = clCreateContext(nullptr, 1, &FullDeviceIDs[I].DeviceID,
|
||||
nullptr, nullptr, &CreateContextResult);
|
||||
if (CreateContextResult)
|
||||
return getOpenCLError(CreateContextResult, "clCreateContext");
|
||||
|
||||
cl_int CreateCommandQueueResult;
|
||||
CommandQueues[I] = clCreateCommandQueue(
|
||||
Contexts[I], FullDeviceIDs[I].DeviceID, CL_QUEUE_PROFILING_ENABLE,
|
||||
&CreateCommandQueueResult);
|
||||
if (CreateCommandQueueResult)
|
||||
return getOpenCLError(CreateCommandQueueResult, "clCreateCommandQueue");
|
||||
}
|
||||
|
||||
return OpenCLPlatform(std::move(FullDeviceIDs), std::move(Contexts),
|
||||
std::move(CommandQueues));
|
||||
}
|
||||
|
||||
Expected<int> OpenCLPlatform::getDeviceCount() { return FullDeviceIDs.size(); }
|
||||
|
||||
static void openCLDestroyStream(void *H) {
|
||||
logOpenCLWarning(clReleaseCommandQueue(static_cast<cl_command_queue>(H)),
|
||||
"clReleaseCommandQueue");
|
||||
}
|
||||
|
||||
Expected<Stream> OpenCLPlatform::createStream(int DeviceIndex) {
|
||||
cl_int Result;
|
||||
cl_command_queue Queue = clCreateCommandQueue(
|
||||
Contexts[DeviceIndex], FullDeviceIDs[DeviceIndex].DeviceID,
|
||||
CL_QUEUE_PROFILING_ENABLE, &Result);
|
||||
if (Result)
|
||||
return getOpenCLError(Result, "clCreateCommandQueue");
|
||||
return constructStream(this, DeviceIndex, Queue, openCLDestroyStream);
|
||||
}
|
||||
|
||||
static void openCLEventDestroy(void *H) {
|
||||
cl_event *CLEvent = static_cast<cl_event *>(H);
|
||||
logOpenCLWarning(clReleaseEvent(*CLEvent), "clReleaseEvent");
|
||||
delete CLEvent;
|
||||
}
|
||||
|
||||
Status OpenCLPlatform::streamSync(void *Stream) {
|
||||
return getOpenCLError(clFinish(static_cast<cl_command_queue>(Stream)),
|
||||
"clFinish");
|
||||
}
|
||||
|
||||
Status OpenCLPlatform::streamWaitOnEvent(void *Stream, void *Event) {
|
||||
cl_event *CLEvent = static_cast<cl_event *>(Event);
|
||||
return getOpenCLError(
|
||||
clEnqueueBarrierWithWaitList(static_cast<cl_command_queue>(Stream), 1,
|
||||
CLEvent, nullptr),
|
||||
"clEnqueueMarkerWithWaitList");
|
||||
}
|
||||
|
||||
Expected<Event> OpenCLPlatform::createEvent(int DeviceIndex) {
|
||||
cl_int Result;
|
||||
cl_event Event = clCreateUserEvent(Contexts[DeviceIndex], &Result);
|
||||
if (Result)
|
||||
return getOpenCLError(Result, "clCreateUserEvent");
|
||||
if (cl_int Result = clSetUserEventStatus(Event, CL_COMPLETE))
|
||||
return getOpenCLError(Result, "clSetUserEventStatus");
|
||||
return constructEvent(this, DeviceIndex, new cl_event(Event),
|
||||
openCLEventDestroy);
|
||||
}
|
||||
|
||||
static void openCLDestroyProgram(void *H) {
|
||||
logOpenCLWarning(clReleaseProgram(static_cast<cl_program>(H)),
|
||||
"clReleaseProgram");
|
||||
}
|
||||
|
||||
Expected<Program>
|
||||
OpenCLPlatform::createProgramFromSource(Span<const char> Source,
|
||||
int DeviceIndex) {
|
||||
cl_int Error;
|
||||
const char *CSource = Source.data();
|
||||
size_t SourceSize = Source.size();
|
||||
cl_program Program = clCreateProgramWithSource(Contexts[DeviceIndex], 1,
|
||||
&CSource, &SourceSize, &Error);
|
||||
if (Error)
|
||||
return getOpenCLError(Error, "clCreateProgramWithSource");
|
||||
cl_device_id DeviceID = FullDeviceIDs[DeviceIndex].DeviceID;
|
||||
if (cl_int Error =
|
||||
clBuildProgram(Program, 1, &DeviceID, nullptr, nullptr, nullptr))
|
||||
return getOpenCLError(Error, "clBuildProgram");
|
||||
return constructProgram(this, Program, openCLDestroyProgram);
|
||||
}
|
||||
|
||||
Expected<void *> OpenCLPlatform::rawMallocD(ptrdiff_t ByteCount,
|
||||
int DeviceIndex) {
|
||||
cl_int Result;
|
||||
cl_mem Memory = clCreateBuffer(Contexts[DeviceIndex], CL_MEM_READ_WRITE,
|
||||
ByteCount, nullptr, &Result);
|
||||
if (Result)
|
||||
return getOpenCLError(Result, "clCreateBuffer");
|
||||
return reinterpret_cast<void *>(Memory);
|
||||
}
|
||||
|
||||
static void openCLDestroyDeviceMemory(void *H) {
|
||||
logOpenCLWarning(clReleaseMemObject(static_cast<cl_mem>(H)),
|
||||
"clReleaseMemObject");
|
||||
}
|
||||
|
||||
HandleDestructor OpenCLPlatform::getDeviceMemoryHandleDestructor() {
|
||||
return openCLDestroyDeviceMemory;
|
||||
}
|
||||
|
||||
void *OpenCLPlatform::getDeviceMemorySpanHandle(void *BaseHandle,
|
||||
size_t ByteSize,
|
||||
size_t ByteOffset) {
|
||||
cl_int Error;
|
||||
cl_buffer_region Region;
|
||||
Region.origin = ByteOffset;
|
||||
Region.size = ByteSize;
|
||||
cl_mem SubBuffer =
|
||||
clCreateSubBuffer(static_cast<cl_mem>(BaseHandle), 0,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, &Region, &Error);
|
||||
logOpenCLWarning(Error, "clCreateSubBuffer");
|
||||
if (Error)
|
||||
return nullptr;
|
||||
return SubBuffer;
|
||||
}
|
||||
|
||||
void OpenCLPlatform::rawDestroyDeviceMemorySpanHandle(void *Handle) {
|
||||
openCLDestroyDeviceMemory(Handle);
|
||||
}
|
||||
|
||||
Expected<void *>
|
||||
OpenCLPlatform::rawGetDeviceSymbolAddress(const void * /*Symbol*/,
|
||||
int /*DeviceIndex*/) {
|
||||
// This doesn't seem to have any equivalent in OpenCL.
|
||||
return Status("not implemented");
|
||||
}
|
||||
|
||||
Expected<ptrdiff_t>
|
||||
OpenCLPlatform::rawGetDeviceSymbolSize(const void * /*Symbol*/,
|
||||
int /*DeviceIndex*/) {
|
||||
// This doesn't seem to have any equivalent in OpenCL.
|
||||
return Status("not implemented");
|
||||
}
|
||||
|
||||
static void noOpHandleDestructor(void *) {}
|
||||
|
||||
Status OpenCLPlatform::rawRegisterHostMem(const void * /*Memory*/,
|
||||
ptrdiff_t /*ByteCount*/) {
|
||||
// TODO(jhen): Do we want to do something to pin the memory here?
|
||||
return Status();
|
||||
}
|
||||
|
||||
HandleDestructor OpenCLPlatform::getUnregisterHostMemoryHandleDestructor() {
|
||||
// TODO(jhen): Do we want to unpin the memory here?
|
||||
return noOpHandleDestructor;
|
||||
}
|
||||
|
||||
Expected<void *> OpenCLPlatform::rawMallocRegisteredH(ptrdiff_t ByteCount) {
|
||||
// TODO(jhen): Do we want to do something to pin the memory here?
|
||||
return std::malloc(ByteCount);
|
||||
}
|
||||
|
||||
static void freeMemoryHandleDestructor(void *Memory) {
|
||||
// TODO(jhen): Do we want to unpin the memory here?
|
||||
std::free(Memory);
|
||||
}
|
||||
|
||||
HandleDestructor OpenCLPlatform::getFreeHostMemoryHandleDestructor() {
|
||||
return freeMemoryHandleDestructor;
|
||||
}
|
||||
|
||||
Status OpenCLPlatform::asyncCopyDToD(const void *DeviceSrc,
|
||||
ptrdiff_t DeviceSrcByteOffset,
|
||||
void *DeviceDst,
|
||||
ptrdiff_t DeviceDstByteOffset,
|
||||
ptrdiff_t ByteCount, void *Stream) {
|
||||
return getOpenCLError(
|
||||
clEnqueueCopyBuffer(static_cast<cl_command_queue>(Stream),
|
||||
static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
|
||||
static_cast<cl_mem>(DeviceDst), DeviceSrcByteOffset,
|
||||
DeviceDstByteOffset, ByteCount, 0, nullptr, nullptr),
|
||||
"clEnqueueCopyBuffer");
|
||||
}
|
||||
|
||||
Status OpenCLPlatform::asyncCopyDToH(const void *DeviceSrc,
|
||||
ptrdiff_t DeviceSrcByteOffset,
|
||||
void *HostDst, ptrdiff_t ByteCount,
|
||||
void *Stream) {
|
||||
return getOpenCLError(
|
||||
clEnqueueReadBuffer(static_cast<cl_command_queue>(Stream),
|
||||
static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
|
||||
CL_TRUE, DeviceSrcByteOffset, ByteCount, HostDst, 0,
|
||||
nullptr, nullptr),
|
||||
"clEnqueueReadBuffer");
|
||||
}
|
||||
|
||||
Status OpenCLPlatform::asyncCopyHToD(const void *HostSrc, void *DeviceDst,
|
||||
ptrdiff_t DeviceDstByteOffset,
|
||||
ptrdiff_t ByteCount, void *Stream) {
|
||||
return getOpenCLError(
|
||||
clEnqueueWriteBuffer(static_cast<cl_command_queue>(Stream),
|
||||
static_cast<cl_mem>(DeviceDst), CL_TRUE,
|
||||
DeviceDstByteOffset, ByteCount, HostSrc, 0, nullptr,
|
||||
nullptr),
|
||||
"clEnqueueWriteBuffer");
|
||||
}
|
||||
|
||||
Status OpenCLPlatform::asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
|
||||
ptrdiff_t ByteCount, char ByteValue,
|
||||
void *Stream) {
|
||||
return getOpenCLError(
|
||||
clEnqueueFillBuffer(static_cast<cl_command_queue>(Stream),
|
||||
static_cast<cl_mem>(DeviceDst), &ByteValue, 1,
|
||||
ByteOffset, ByteCount, 0, nullptr, nullptr),
|
||||
"clEnqueueFillBuffer");
|
||||
}
|
||||
|
||||
struct StreamCallbackUserData {
|
||||
StreamCallbackUserData(Stream &TheStream, StreamCallback Function,
|
||||
cl_event EndEvent)
|
||||
: TheStream(TheStream), TheFunction(std::move(Function)),
|
||||
EndEvent(EndEvent) {}
|
||||
|
||||
Stream &TheStream;
|
||||
StreamCallback TheFunction;
|
||||
cl_event EndEvent;
|
||||
};
|
||||
|
||||
// A function with the right signature to pass to clSetEventCallback.
|
||||
void CL_CALLBACK openCLStreamCallbackShim(cl_event,
|
||||
cl_int EventCommandExecStatus,
|
||||
void *UserData) {
|
||||
std::unique_ptr<StreamCallbackUserData> Data(
|
||||
static_cast<StreamCallbackUserData *>(UserData));
|
||||
Data->TheFunction(
|
||||
Data->TheStream,
|
||||
getOpenCLError(EventCommandExecStatus, "stream callback error state"));
|
||||
if (cl_int Result = clSetUserEventStatus(Data->EndEvent, CL_COMPLETE))
|
||||
logOpenCLWarning(Result, "clSetUserEventStatus");
|
||||
if (cl_int Result = clReleaseEvent(Data->EndEvent))
|
||||
logOpenCLWarning(Result, "clReleaseEvent");
|
||||
}
|
||||
|
||||
Status OpenCLPlatform::addStreamCallback(Stream &TheStream,
|
||||
StreamCallback Callback) {
|
||||
cl_int Result;
|
||||
cl_event StartEvent =
|
||||
clCreateUserEvent(Contexts[TheStream.getDeviceIndex()], &Result);
|
||||
if (Result)
|
||||
return getOpenCLError(Result, "clCreateUserEvent");
|
||||
cl_event EndEvent =
|
||||
clCreateUserEvent(Contexts[TheStream.getDeviceIndex()], &Result);
|
||||
if (Result)
|
||||
return getOpenCLError(Result, "clCreateUserEvent");
|
||||
cl_event StartBarrierEvent;
|
||||
if (cl_int Result = clEnqueueBarrierWithWaitList(
|
||||
static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1,
|
||||
&StartEvent, &StartBarrierEvent))
|
||||
return getOpenCLError(Result, "clEnqueueBarrierWithWaitList");
|
||||
|
||||
if (cl_int Result = clEnqueueBarrierWithWaitList(
|
||||
static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1,
|
||||
&EndEvent, nullptr))
|
||||
return getOpenCLError(Result, "clEnqueueBarrierWithWaitList");
|
||||
|
||||
std::unique_ptr<StreamCallbackUserData> UserData(
|
||||
new StreamCallbackUserData(TheStream, std::move(Callback), EndEvent));
|
||||
if (cl_int Result =
|
||||
clSetEventCallback(StartBarrierEvent, CL_RUNNING,
|
||||
openCLStreamCallbackShim, UserData.release()))
|
||||
return getOpenCLError(Result, "clSetEventCallback");
|
||||
|
||||
if (cl_int Result = clSetUserEventStatus(StartEvent, CL_COMPLETE))
|
||||
return getOpenCLError(Result, "clSetUserEventStatus");
|
||||
|
||||
if (cl_int Result = clReleaseEvent(StartBarrierEvent))
|
||||
return getOpenCLError(Result, "clReleaseEvent");
|
||||
|
||||
return getOpenCLError(clReleaseEvent(StartEvent), "clReleaseEvent");
|
||||
}
|
||||
|
||||
Status OpenCLPlatform::enqueueEvent(void *Event, void *Stream) {
|
||||
cl_event *CLEvent = static_cast<cl_event *>(Event);
|
||||
cl_event OldEvent = *CLEvent;
|
||||
cl_event NewEvent;
|
||||
if (cl_int Result = clEnqueueMarkerWithWaitList(
|
||||
static_cast<cl_command_queue>(Stream), 0, nullptr, &NewEvent))
|
||||
return getOpenCLError(Result, "clEnqueueMarkerWithWaitList");
|
||||
*CLEvent = NewEvent;
|
||||
return getOpenCLError(clReleaseEvent(OldEvent), "clReleaseEvent");
|
||||
}
|
||||
|
||||
bool OpenCLPlatform::eventIsDone(void *Event) {
|
||||
cl_event *CLEvent = static_cast<cl_event *>(Event);
|
||||
cl_int EventStatus;
|
||||
logOpenCLWarning(clGetEventInfo(*CLEvent, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
||||
sizeof(EventStatus), &EventStatus, nullptr),
|
||||
"clGetEventInfo");
|
||||
return EventStatus == CL_COMPLETE || EventStatus < 0;
|
||||
}
|
||||
|
||||
Status OpenCLPlatform::eventSync(void *Event) {
|
||||
cl_event *CLEvent = static_cast<cl_event *>(Event);
|
||||
return getOpenCLError(clWaitForEvents(1, CLEvent), "clWaitForEvents");
|
||||
}
|
||||
|
||||
Expected<float> OpenCLPlatform::getSecondsBetweenEvents(void *StartEvent,
|
||||
void *EndEvent) {
|
||||
cl_event *CLStartEvent = static_cast<cl_event *>(StartEvent);
|
||||
cl_event *CLEndEvent = static_cast<cl_event *>(EndEvent);
|
||||
|
||||
cl_profiling_info ParamName = CL_PROFILING_COMMAND_END;
|
||||
cl_ulong StartNanoseconds;
|
||||
cl_ulong EndNanoseconds;
|
||||
if (cl_int Result =
|
||||
clGetEventProfilingInfo(*CLStartEvent, ParamName, sizeof(cl_ulong),
|
||||
&StartNanoseconds, nullptr))
|
||||
return getOpenCLError(Result, "clGetEventProfilingInfo");
|
||||
if (cl_int Result = clGetEventProfilingInfo(
|
||||
*CLEndEvent, ParamName, sizeof(cl_ulong), &EndNanoseconds, nullptr))
|
||||
return getOpenCLError(Result, "clGetEventProfilingInfo");
|
||||
return (EndNanoseconds - StartNanoseconds) * 1e-12;
|
||||
}
|
||||
|
||||
Expected<void *> OpenCLPlatform::rawCreateKernel(void *Program,
|
||||
const std::string &Name) {
|
||||
|
||||
cl_int Error;
|
||||
cl_kernel Kernel =
|
||||
clCreateKernel(static_cast<cl_program>(Program), Name.c_str(), &Error);
|
||||
if (Error)
|
||||
return getOpenCLError(Error, "clCreateKernel");
|
||||
return Kernel;
|
||||
}
|
||||
|
||||
static void openCLDestroyKernel(void *H) {
|
||||
logOpenCLWarning(clReleaseKernel(static_cast<cl_kernel>(H)),
|
||||
"clReleaseKernel");
|
||||
}
|
||||
|
||||
HandleDestructor OpenCLPlatform::getKernelHandleDestructor() {
|
||||
return openCLDestroyKernel;
|
||||
}
|
||||
|
||||
Status OpenCLPlatform::rawEnqueueKernelLaunch(
|
||||
void *Stream, void *Kernel, KernelLaunchDimensions LaunchDimensions,
|
||||
Span<void *> Arguments, Span<size_t> ArgumentSizes,
|
||||
size_t SharedMemoryBytes) {
|
||||
if (SharedMemoryBytes != 0)
|
||||
return Status("OpenCL kernel launches only accept zero for the shared "
|
||||
"memory byte size");
|
||||
cl_kernel TheKernel = static_cast<cl_kernel>(Kernel);
|
||||
for (int I = 0; I < Arguments.size(); ++I)
|
||||
if (cl_int Error =
|
||||
clSetKernelArg(TheKernel, I, ArgumentSizes[I], Arguments[I]))
|
||||
return getOpenCLError(Error, "clSetKernelArg");
|
||||
size_t LocalWorkSize[] = {LaunchDimensions.BlockX, LaunchDimensions.BlockY,
|
||||
LaunchDimensions.BlockZ};
|
||||
size_t GlobalWorkSize[] = {LaunchDimensions.BlockX * LaunchDimensions.GridX,
|
||||
LaunchDimensions.BlockY * LaunchDimensions.GridY,
|
||||
LaunchDimensions.BlockZ * LaunchDimensions.GridZ};
|
||||
return getOpenCLError(
|
||||
clEnqueueNDRangeKernel(static_cast<cl_command_queue>(Stream), TheKernel,
|
||||
3, nullptr, GlobalWorkSize, LocalWorkSize, 0,
|
||||
nullptr, nullptr),
|
||||
"clEnqueueNDRangeKernel");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace opencl {
|
||||
|
||||
/// Gets an OpenCLPlatform instance and returns it as an unowned pointer to a
|
||||
/// Platform.
|
||||
Expected<Platform *> getPlatform() {
|
||||
static auto MaybePlatform = []() -> Expected<OpenCLPlatform *> {
|
||||
Expected<OpenCLPlatform> CreationResult = OpenCLPlatform::create();
|
||||
if (CreationResult.isError())
|
||||
return CreationResult.getError();
|
||||
else
|
||||
return new OpenCLPlatform(CreationResult.takeValue());
|
||||
}();
|
||||
return MaybePlatform;
|
||||
}
|
||||
|
||||
} // namespace opencl
|
||||
|
||||
} // namespace acxxel
|
@ -1,221 +0,0 @@
|
||||
//===--- span- The span class -----------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef ACXXEL_SPAN_H
|
||||
#define ACXXEL_SPAN_H
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <exception>
|
||||
#include <iterator>
|
||||
#include <type_traits>
|
||||
|
||||
namespace acxxel {
|
||||
|
||||
/// Value used to indicate slicing to the end of the span.
|
||||
static constexpr std::ptrdiff_t dynamic_extent = -1; // NOLINT
|
||||
|
||||
class SpanBase {};
|
||||
|
||||
/// Implementation of the proposed C++17 std::span class.
|
||||
///
|
||||
/// Based on the paper:
|
||||
/// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0122r1.pdf
|
||||
template <typename ElementType> class Span : public SpanBase {
|
||||
public:
|
||||
/// \name constants and types
|
||||
/// \{
|
||||
|
||||
using element_type = ElementType;
|
||||
using index_type = std::ptrdiff_t;
|
||||
using pointer = element_type *;
|
||||
using reference = element_type &;
|
||||
using iterator = element_type *;
|
||||
using const_iterator = const element_type *;
|
||||
using value_type = typename std::remove_const<element_type>::type;
|
||||
|
||||
/// \}
|
||||
|
||||
/// \name constructors, copy, assignment, and destructor.
|
||||
/// \{
|
||||
|
||||
/// Constructs an empty span with null pointer data.
|
||||
Span() : Data(nullptr), Size(0) {}
|
||||
|
||||
/// Constructs an empty span with null pointer data.
|
||||
// Intentionally implicit.
|
||||
Span(std::nullptr_t) : Data(nullptr), Size(0) {}
|
||||
|
||||
/// Constructs a span from a pointer and element count.
|
||||
Span(pointer Ptr, index_type Count) : Data(Ptr), Size(Count) {
|
||||
if (Count < 0 || (!Ptr && Count))
|
||||
std::terminate();
|
||||
}
|
||||
|
||||
/// Constructs a span from a pointer to the fist element in the range and a
|
||||
/// pointer to one past the last element in the range.
|
||||
Span(pointer FirstElem, pointer LastElem)
|
||||
: Data(FirstElem), Size(std::distance(FirstElem, LastElem)) {
|
||||
if (Size < 0)
|
||||
std::terminate();
|
||||
}
|
||||
|
||||
/// Constructs a span from an array.
|
||||
// Intentionally implicit.
|
||||
template <typename T, size_t N> Span(T (&Arr)[N]) : Data(Arr), Size(N) {}
|
||||
|
||||
/// Constructs a span from a std::array.
|
||||
// Intentionally implicit.
|
||||
template <size_t N>
|
||||
Span(const std::array<typename std::remove_const<element_type>::type, N> &Arr)
|
||||
: Data(Arr.data()), Size(N) {}
|
||||
|
||||
/// Constructs a span from a container such as a std::vector.
|
||||
// TODO(jhen): Put in a check to make sure this constructor does not
|
||||
// participate in overload resolution unless Container meets the following
|
||||
// requirements:
|
||||
// * Container is a contiguous container and a sequence container.
|
||||
// Intentionally implicit.
|
||||
template <typename Container>
|
||||
Span(Container &Cont,
|
||||
typename std::enable_if<
|
||||
std::is_same<
|
||||
typename std::remove_const<typename Container::value_type>::type,
|
||||
typename std::remove_const<element_type>::type>::value &&
|
||||
!std::is_array<Container>::value &&
|
||||
!std::is_base_of<SpanBase, Container>::value &&
|
||||
std::is_convertible<decltype(&Cont[0]), pointer>::value>::type * =
|
||||
nullptr)
|
||||
: Data(Cont.data()), Size(Cont.size()) {}
|
||||
|
||||
/// Avoids creating spans from expiring temporary objects.
|
||||
// TODO(jhen): Put in a check to make sure this constructor does not
|
||||
// participate in overload resolution unless Container meets the following
|
||||
// requirements:
|
||||
// * Container is a contiguous container and a sequence container.
|
||||
template <typename Container>
|
||||
Span(Container &&Cont,
|
||||
typename std::enable_if<
|
||||
std::is_same<
|
||||
typename std::remove_const<typename Container::value_type>::type,
|
||||
typename std::remove_const<element_type>::type>::value &&
|
||||
!std::is_array<Container>::value &&
|
||||
!std::is_base_of<SpanBase, Container>::value &&
|
||||
std::is_convertible<decltype(&Cont[0]), pointer>::value>::type * =
|
||||
nullptr) = delete;
|
||||
|
||||
Span(const Span &) noexcept = default;
|
||||
Span(Span &&) noexcept;
|
||||
|
||||
/// Constructs a span from copying a span of another type that can be
|
||||
/// implicitly converted to the type stored by the constructed span.
|
||||
// Intentionally implicit.
|
||||
template <typename OtherElementType>
|
||||
Span(const Span<OtherElementType> &Other)
|
||||
: Data(Other.Data), Size(Other.Size) {}
|
||||
|
||||
/// Constructs a span from moving a span of another type that can be
|
||||
/// implicitly converted to the type stored by the constructed span.
|
||||
// Intentionally implicit.
|
||||
template <typename OtherElementType>
|
||||
Span(Span<OtherElementType> &&Other) : Data(Other.Data), Size(Other.Size) {}
|
||||
|
||||
~Span() = default;
|
||||
|
||||
Span &operator=(const Span &) noexcept = default;
|
||||
Span &operator=(Span &&) noexcept;
|
||||
|
||||
/// \}
|
||||
|
||||
/// \name subviews
|
||||
/// \{
|
||||
|
||||
/// Creates a span out of the first Count elements of this span.
|
||||
Span<element_type> first(index_type Count) const {
|
||||
bool Valid = Count >= 0 && Count <= size();
|
||||
if (!Valid)
|
||||
std::terminate();
|
||||
return Span<element_type>(data(), Count);
|
||||
}
|
||||
|
||||
/// Creates a span out of the last Count elements of this span.
|
||||
Span<element_type> last(index_type Count) const {
|
||||
bool Valid = Count >= 0 && Count <= size();
|
||||
if (!Valid)
|
||||
std::terminate();
|
||||
return Span<element_type>(Count == 0 ? data() : data() + (size() - Count),
|
||||
Count);
|
||||
}
|
||||
|
||||
/// Creates a span out of the Count elements of this span beginning at Offset.
|
||||
///
|
||||
/// If no arguments is provided for Count, the new span will extend to the end
|
||||
/// of the current span.
|
||||
Span<element_type> subspan(index_type Offset,
|
||||
index_type Count = dynamic_extent) const {
|
||||
bool Valid =
|
||||
(Offset == 0 || (Offset > 0 && Offset <= size())) &&
|
||||
(Count == dynamic_extent || (Count >= 0 && Offset + Count <= size()));
|
||||
if (!Valid)
|
||||
std::terminate();
|
||||
return Span<element_type>(
|
||||
data() + Offset, Count == dynamic_extent ? size() - Offset : Count);
|
||||
}
|
||||
|
||||
/// \}
|
||||
|
||||
/// \name observers
|
||||
/// \{
|
||||
|
||||
index_type length() const { return Size; }
|
||||
index_type size() const { return Size; }
|
||||
bool empty() const { return size() == 0; }
|
||||
|
||||
/// \}
|
||||
|
||||
/// \name element access
|
||||
/// \{
|
||||
|
||||
reference operator[](index_type Idx) const {
|
||||
bool Valid = Idx >= 0 && Idx < size();
|
||||
if (!Valid)
|
||||
std::terminate();
|
||||
return Data[Idx];
|
||||
}
|
||||
|
||||
reference operator()(index_type Idx) const { return operator[](Idx); }
|
||||
|
||||
pointer data() const noexcept { return Data; }
|
||||
|
||||
/// \}
|
||||
|
||||
/// \name iterator support
|
||||
/// \{
|
||||
|
||||
iterator begin() const noexcept { return Data; }
|
||||
iterator end() const noexcept { return Data + Size; }
|
||||
const_iterator cbegin() const noexcept { return Data; }
|
||||
const_iterator cend() const noexcept { return Data + Size; }
|
||||
|
||||
/// \}
|
||||
|
||||
private:
|
||||
template <typename OtherElementType> friend class Span;
|
||||
|
||||
pointer Data;
|
||||
index_type Size;
|
||||
};
|
||||
|
||||
template <typename ElementType>
|
||||
Span<ElementType>::Span(Span &&) noexcept = default;
|
||||
template <typename ElementType>
|
||||
Span<ElementType> &Span<ElementType>::operator=(Span &&) noexcept = default;
|
||||
|
||||
} // namespace acxxel
|
||||
|
||||
#endif // ACXXEL_SPAN_H
|
@ -1,235 +0,0 @@
|
||||
//===--- status.h - Status and Expected classes -----------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef ACXXEL_STATUS_H
|
||||
#define ACXXEL_STATUS_H
|
||||
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
|
||||
// The clang compiler supports annotating class declarations with the
|
||||
// warn_unused_result attribute, and this has the meaning that whenever that
|
||||
// type is returned from a function, the function is marked as
|
||||
// warn_unused_result.
|
||||
//
|
||||
// The gcc compiler does not support warn_unused_result for classes, only for
|
||||
// functions, so we only use this feature with clang.
|
||||
#ifdef __clang__
|
||||
#define ACXXEL_WARN_UNUSED_RESULT_TYPE __attribute__((warn_unused_result))
|
||||
#else
|
||||
#define ACXXEL_WARN_UNUSED_RESULT_TYPE
|
||||
#endif
|
||||
|
||||
namespace acxxel {
|
||||
|
||||
/// Status type.
|
||||
///
|
||||
/// May represent failure with a string error message, or may indicate success.
|
||||
class ACXXEL_WARN_UNUSED_RESULT_TYPE Status {
|
||||
public:
|
||||
/// Creates a Status representing success.
|
||||
Status() : HasMessage(false) {}
|
||||
|
||||
/// Creates a Status representing failure with the given error message.
|
||||
explicit Status(const std::string &Message)
|
||||
: HasMessage(true), Message(Message) {}
|
||||
|
||||
Status(const Status &) = default;
|
||||
|
||||
Status &operator=(const Status &) = default;
|
||||
|
||||
Status(Status &&) noexcept = default;
|
||||
|
||||
// Cannot use default because the move assignment operator for std::string is
|
||||
// not marked noexcept.
|
||||
Status &operator=(Status &&That) noexcept {
|
||||
HasMessage = That.HasMessage;
|
||||
Message = std::move(That.Message);
|
||||
return *this;
|
||||
}
|
||||
|
||||
~Status() = default;
|
||||
|
||||
/// Returns true if this Status represents failure. Otherwise, returns false.
|
||||
bool isError() const { return HasMessage; }
|
||||
|
||||
/// Returns true if this Status represents success. Otherwise, returns false.
|
||||
operator bool() const { return !HasMessage; }
|
||||
|
||||
/// Gets a reference to the error message for this Status.
|
||||
///
|
||||
/// Should only be called if isError() returns true.
|
||||
const std::string &getMessage() const { return Message; }
|
||||
|
||||
private:
|
||||
bool HasMessage;
|
||||
std::string Message;
|
||||
};
|
||||
|
||||
class ExpectedBase {
|
||||
protected:
|
||||
enum class State {
|
||||
SUCCESS,
|
||||
FAILURE,
|
||||
MOVED,
|
||||
};
|
||||
};
|
||||
|
||||
/// Either a value of type T or a Status representing failure.
|
||||
template <typename T> class Expected : public ExpectedBase {
|
||||
public:
|
||||
/// Creates an Expected representing failure with the given Error status.
|
||||
// Intentionally implicit.
|
||||
Expected(Status AnError)
|
||||
: TheState(State::FAILURE), TheError(std::move(AnError)) {
|
||||
assert(AnError.isError() && "constructing an error Expected value from a "
|
||||
"success status is not allowed");
|
||||
}
|
||||
|
||||
/// Creates an Expected representing success with the given value.
|
||||
// Intentionally implicit.
|
||||
Expected(T Value) : TheState(State::SUCCESS), TheValue(std::move(Value)) {}
|
||||
|
||||
Expected(const Expected &That) : TheState(That.TheState) {
|
||||
switch (TheState) {
|
||||
case State::SUCCESS:
|
||||
new (&TheValue) T(That.TheValue);
|
||||
break;
|
||||
case State::FAILURE:
|
||||
new (&TheError) Status(That.TheError);
|
||||
break;
|
||||
case State::MOVED:
|
||||
// Nothing to do in this case.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Expected &operator=(Expected That) {
|
||||
TheState = That.TheState;
|
||||
switch (TheState) {
|
||||
case State::SUCCESS:
|
||||
new (&TheValue) T(std::move(That.TheValue));
|
||||
break;
|
||||
case State::FAILURE:
|
||||
new (&TheError) Status(std::move(That.TheError));
|
||||
break;
|
||||
case State::MOVED:
|
||||
// Nothing to do in this case.
|
||||
break;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Expected(Expected &&That) noexcept : TheState(That.TheState) {
|
||||
switch (TheState) {
|
||||
case State::SUCCESS:
|
||||
new (&TheValue) T(std::move(That.TheValue));
|
||||
break;
|
||||
case State::FAILURE:
|
||||
new (&TheError) Status(std::move(That.TheError));
|
||||
break;
|
||||
case State::MOVED:
|
||||
// Nothing to do in this case.
|
||||
break;
|
||||
}
|
||||
That.TheState = State::MOVED;
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
Expected(const Expected<U> &That) : TheState(That.TheState) {
|
||||
switch (TheState) {
|
||||
case State::SUCCESS:
|
||||
new (&TheValue) T(That.TheValue);
|
||||
break;
|
||||
case State::FAILURE:
|
||||
new (&TheError) Status(That.TheError);
|
||||
break;
|
||||
case State::MOVED:
|
||||
// Nothing to do in this case.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename U> Expected(Expected<U> &&That) : TheState(That.TheState) {
|
||||
switch (TheState) {
|
||||
case State::SUCCESS:
|
||||
new (&TheValue) T(std::move(That.TheValue));
|
||||
break;
|
||||
case State::FAILURE:
|
||||
new (&TheError) Status(std::move(That.TheError));
|
||||
break;
|
||||
case State::MOVED:
|
||||
// Nothing to do in this case.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
~Expected() {
|
||||
switch (TheState) {
|
||||
case State::SUCCESS:
|
||||
TheValue.~T();
|
||||
break;
|
||||
case State::FAILURE:
|
||||
TheError.~Status();
|
||||
break;
|
||||
case State::MOVED:
|
||||
// Nothing to do for this case.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if this instance represents failure.
|
||||
bool isError() const { return TheState != State::SUCCESS; }
|
||||
|
||||
/// Gets a reference to the Status object.
|
||||
///
|
||||
/// Should only be called if isError() returns true.
|
||||
const Status &getError() const {
|
||||
assert(isError());
|
||||
return TheError;
|
||||
}
|
||||
|
||||
/// Gets a const reference to the value object.
|
||||
///
|
||||
/// Should only be called if isError() returns false.
|
||||
const T &getValue() const {
|
||||
assert(!isError());
|
||||
return TheValue;
|
||||
}
|
||||
|
||||
/// Gets a reference to the value object.
|
||||
///
|
||||
/// Should only be called if isError() returns false.
|
||||
T &getValue() {
|
||||
assert(!isError());
|
||||
return TheValue;
|
||||
}
|
||||
|
||||
/// Takes the value from this object by moving it to the return value.
|
||||
///
|
||||
/// Should only be called if isError() returns false.
|
||||
T takeValue() {
|
||||
assert(!isError());
|
||||
TheState = State::MOVED;
|
||||
return std::move(TheValue);
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename U> friend class Expected;
|
||||
|
||||
State TheState;
|
||||
|
||||
union {
|
||||
T TheValue;
|
||||
Status TheError;
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace acxxel
|
||||
|
||||
#endif // ACXXEL_STATUS_H
|
@ -1,41 +0,0 @@
|
||||
add_executable(acxxel_test acxxel_test.cpp)
|
||||
target_link_libraries(
|
||||
acxxel_test
|
||||
acxxel
|
||||
${GTEST_BOTH_LIBRARIES}
|
||||
${CMAKE_THREAD_LIBS_INIT})
|
||||
add_test(AcxxelTest acxxel_test)
|
||||
|
||||
add_executable(span_test span_test.cpp)
|
||||
target_link_libraries(
|
||||
span_test
|
||||
${GTEST_BOTH_LIBRARIES}
|
||||
${CMAKE_THREAD_LIBS_INIT})
|
||||
add_test(SpanTest span_test)
|
||||
|
||||
add_executable(status_test status_test.cpp)
|
||||
target_link_libraries(
|
||||
status_test
|
||||
${GTEST_BOTH_LIBRARIES}
|
||||
${CMAKE_THREAD_LIBS_INIT})
|
||||
add_test(StatusTest status_test)
|
||||
|
||||
if(ACXXEL_ENABLE_OPENCL)
|
||||
add_executable(opencl_test opencl_test.cpp)
|
||||
target_link_libraries(
|
||||
opencl_test
|
||||
acxxel
|
||||
${GTEST_BOTH_LIBRARIES}
|
||||
${CMAKE_THREAD_LIBS_INIT})
|
||||
add_test(OpenCLTest opencl_test)
|
||||
endif()
|
||||
|
||||
if(ACXXEL_ENABLE_MULTI_DEVICE_UNIT_TESTS)
|
||||
add_executable(multi_device_test multi_device_test.cpp)
|
||||
target_link_libraries(
|
||||
multi_device_test
|
||||
acxxel
|
||||
${GTEST_BOTH_LIBRARIES}
|
||||
${CMAKE_THREAD_LIBS_INIT})
|
||||
add_test(MultiDeviceTest multi_device_test)
|
||||
endif()
|
@ -1,419 +0,0 @@
|
||||
//===--- acxxel_test.cpp - Tests for the Acxxel API -----------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "acxxel.h"
|
||||
#include "config.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include <chrono>
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename T, size_t N> constexpr size_t arraySize(T (&)[N]) {
|
||||
return N;
|
||||
}
|
||||
|
||||
using PlatformGetter = acxxel::Expected<acxxel::Platform *> (*)();
|
||||
class AcxxelTest : public ::testing::TestWithParam<PlatformGetter> {};
|
||||
|
||||
TEST_P(AcxxelTest, GetDeviceCount) {
|
||||
acxxel::Platform *Platform = GetParam()().takeValue();
|
||||
int DeviceCount = Platform->getDeviceCount().getValue();
|
||||
EXPECT_GE(DeviceCount, 0);
|
||||
}
|
||||
|
||||
// Tests all the methods of a DeviceMemorySpan that was created from the asSpan
|
||||
// method of a DeviceMemory object.
|
||||
//
|
||||
// The length is the number of elements in the span. The ElementByteSize is the
|
||||
// number of bytes per element in the span.
|
||||
//
|
||||
// It is assumed that the input span has 10 or more elements.
|
||||
template <typename SpanType>
|
||||
void testFullDeviceMemorySpan(SpanType &&Span, ptrdiff_t Length,
|
||||
ptrdiff_t ElementByteSize) {
|
||||
EXPECT_GE(Length, 10);
|
||||
EXPECT_GT(ElementByteSize, 0);
|
||||
|
||||
// Full span
|
||||
EXPECT_EQ(Length, Span.length());
|
||||
EXPECT_EQ(Length, Span.size());
|
||||
EXPECT_EQ(Length * ElementByteSize, Span.byte_size());
|
||||
EXPECT_EQ(0, Span.offset());
|
||||
EXPECT_EQ(0, Span.byte_offset());
|
||||
EXPECT_FALSE(Span.empty());
|
||||
|
||||
// Sub-span with first method.
|
||||
auto First2 = Span.first(2);
|
||||
EXPECT_EQ(2, First2.length());
|
||||
EXPECT_EQ(2, First2.size());
|
||||
EXPECT_EQ(2 * ElementByteSize, First2.byte_size());
|
||||
EXPECT_EQ(0, First2.offset());
|
||||
EXPECT_EQ(0, First2.byte_offset());
|
||||
EXPECT_FALSE(First2.empty());
|
||||
|
||||
auto First0 = Span.first(0);
|
||||
EXPECT_EQ(0, First0.length());
|
||||
EXPECT_EQ(0, First0.size());
|
||||
EXPECT_EQ(0, First0.byte_size());
|
||||
EXPECT_EQ(0, First0.offset());
|
||||
EXPECT_EQ(0, First0.byte_offset());
|
||||
EXPECT_TRUE(First0.empty());
|
||||
|
||||
// Sub-span with last method.
|
||||
auto Last2 = Span.last(2);
|
||||
EXPECT_EQ(2, Last2.length());
|
||||
EXPECT_EQ(2, Last2.size());
|
||||
EXPECT_EQ(2 * ElementByteSize, Last2.byte_size());
|
||||
EXPECT_EQ(Length - 2, Last2.offset());
|
||||
EXPECT_EQ((Length - 2) * ElementByteSize, Last2.byte_offset());
|
||||
EXPECT_FALSE(Last2.empty());
|
||||
|
||||
auto Last0 = Span.last(0);
|
||||
EXPECT_EQ(0, Last0.length());
|
||||
EXPECT_EQ(0, Last0.size());
|
||||
EXPECT_EQ(0, Last0.byte_size());
|
||||
EXPECT_EQ(Length, Last0.offset());
|
||||
EXPECT_EQ(Length * ElementByteSize, Last0.byte_offset());
|
||||
EXPECT_TRUE(Last0.empty());
|
||||
|
||||
// Sub-span with subspan method.
|
||||
auto Middle2 = Span.subspan(4, 2);
|
||||
EXPECT_EQ(2, Middle2.length());
|
||||
EXPECT_EQ(2, Middle2.size());
|
||||
EXPECT_EQ(2 * ElementByteSize, Middle2.byte_size());
|
||||
EXPECT_EQ(4, Middle2.offset());
|
||||
EXPECT_EQ(4 * ElementByteSize, Middle2.byte_offset());
|
||||
EXPECT_FALSE(Middle2.empty());
|
||||
|
||||
auto Middle0 = Span.subspan(4, 0);
|
||||
EXPECT_EQ(0, Middle0.length());
|
||||
EXPECT_EQ(0, Middle0.size());
|
||||
EXPECT_EQ(0, Middle0.byte_size());
|
||||
EXPECT_EQ(4, Middle0.offset());
|
||||
EXPECT_EQ(4 * ElementByteSize, Middle0.byte_offset());
|
||||
EXPECT_TRUE(Middle0.empty());
|
||||
|
||||
auto Subspan2AtStart = Span.subspan(0, 2);
|
||||
EXPECT_EQ(2, Subspan2AtStart.length());
|
||||
EXPECT_EQ(2, Subspan2AtStart.size());
|
||||
EXPECT_EQ(2 * ElementByteSize, Subspan2AtStart.byte_size());
|
||||
EXPECT_EQ(0, Subspan2AtStart.offset());
|
||||
EXPECT_EQ(0, Subspan2AtStart.byte_offset());
|
||||
EXPECT_FALSE(Subspan2AtStart.empty());
|
||||
|
||||
auto Subspan2AtEnd = Span.subspan(Length - 2, 2);
|
||||
EXPECT_EQ(2, Subspan2AtEnd.length());
|
||||
EXPECT_EQ(2, Subspan2AtEnd.size());
|
||||
EXPECT_EQ(2 * ElementByteSize, Subspan2AtEnd.byte_size());
|
||||
EXPECT_EQ(Length - 2, Subspan2AtEnd.offset());
|
||||
EXPECT_EQ((Length - 2) * ElementByteSize, Subspan2AtEnd.byte_offset());
|
||||
EXPECT_FALSE(Subspan2AtEnd.empty());
|
||||
|
||||
auto Subspan0AtStart = Span.subspan(0, 0);
|
||||
EXPECT_EQ(0, Subspan0AtStart.length());
|
||||
EXPECT_EQ(0, Subspan0AtStart.size());
|
||||
EXPECT_EQ(0, Subspan0AtStart.byte_size());
|
||||
EXPECT_EQ(0, Subspan0AtStart.offset());
|
||||
EXPECT_EQ(0, Subspan0AtStart.byte_offset());
|
||||
EXPECT_TRUE(Subspan0AtStart.empty());
|
||||
|
||||
auto Subspan0AtEnd = Span.subspan(Length, 0);
|
||||
EXPECT_EQ(0, Subspan0AtEnd.length());
|
||||
EXPECT_EQ(0, Subspan0AtEnd.size());
|
||||
EXPECT_EQ(0, Subspan0AtEnd.byte_size());
|
||||
EXPECT_EQ(Length, Subspan0AtEnd.offset());
|
||||
EXPECT_EQ(Length * ElementByteSize, Subspan0AtEnd.byte_offset());
|
||||
EXPECT_TRUE(Subspan0AtEnd.empty());
|
||||
}
|
||||
|
||||
TEST_P(AcxxelTest, DeviceMemory) {
|
||||
acxxel::Platform *Platform = GetParam()().takeValue();
|
||||
acxxel::Expected<acxxel::DeviceMemory<int>> MaybeMemory =
|
||||
Platform->mallocD<int>(10);
|
||||
EXPECT_FALSE(MaybeMemory.isError());
|
||||
|
||||
// ref
|
||||
acxxel::DeviceMemory<int> &MemoryRef = MaybeMemory.getValue();
|
||||
EXPECT_EQ(10, MemoryRef.length());
|
||||
EXPECT_EQ(10, MemoryRef.size());
|
||||
EXPECT_EQ(10 * sizeof(int), static_cast<size_t>(MemoryRef.byte_size()));
|
||||
EXPECT_FALSE(MemoryRef.empty());
|
||||
|
||||
// mutable span
|
||||
acxxel::DeviceMemorySpan<int> MutableSpan = MemoryRef.asSpan();
|
||||
testFullDeviceMemorySpan(MutableSpan, 10, sizeof(int));
|
||||
|
||||
// const ref
|
||||
const acxxel::DeviceMemory<int> &ConstMemoryRef = MaybeMemory.getValue();
|
||||
EXPECT_EQ(10, ConstMemoryRef.length());
|
||||
EXPECT_EQ(10, ConstMemoryRef.size());
|
||||
EXPECT_EQ(10 * sizeof(int), static_cast<size_t>(ConstMemoryRef.byte_size()));
|
||||
EXPECT_FALSE(ConstMemoryRef.empty());
|
||||
|
||||
// immutable span
|
||||
acxxel::DeviceMemorySpan<const int> ImmutableSpan = ConstMemoryRef.asSpan();
|
||||
testFullDeviceMemorySpan(ImmutableSpan, 10, sizeof(int));
|
||||
}
|
||||
|
||||
TEST_P(AcxxelTest, CopyHostAndDevice) {
|
||||
acxxel::Platform *Platform = GetParam()().takeValue();
|
||||
acxxel::Stream Stream = Platform->createStream().takeValue();
|
||||
int A[] = {0, 1, 2};
|
||||
std::array<int, arraySize(A)> B;
|
||||
acxxel::DeviceMemory<int> X =
|
||||
Platform->mallocD<int>(arraySize(A)).takeValue();
|
||||
Stream.syncCopyHToD(A, X);
|
||||
Stream.syncCopyDToH(X, B);
|
||||
for (size_t I = 0; I < arraySize(A); ++I)
|
||||
EXPECT_EQ(A[I], B[I]);
|
||||
EXPECT_FALSE(Stream.takeStatus().isError());
|
||||
}
|
||||
|
||||
TEST_P(AcxxelTest, CopyDToD) {
|
||||
acxxel::Platform *Platform = GetParam()().takeValue();
|
||||
acxxel::Stream Stream = Platform->createStream().takeValue();
|
||||
int A[] = {0, 1, 2};
|
||||
std::array<int, arraySize(A)> B;
|
||||
acxxel::DeviceMemory<int> X =
|
||||
Platform->mallocD<int>(arraySize(A)).takeValue();
|
||||
acxxel::DeviceMemory<int> Y =
|
||||
Platform->mallocD<int>(arraySize(A)).takeValue();
|
||||
Stream.syncCopyHToD(A, X);
|
||||
Stream.syncCopyDToD(X, Y);
|
||||
Stream.syncCopyDToH(Y, B);
|
||||
for (size_t I = 0; I < arraySize(A); ++I)
|
||||
EXPECT_EQ(A[I], B[I]);
|
||||
EXPECT_FALSE(Stream.takeStatus().isError());
|
||||
}
|
||||
|
||||
TEST_P(AcxxelTest, AsyncCopyHostAndDevice) {
|
||||
acxxel::Platform *Platform = GetParam()().takeValue();
|
||||
int A[] = {0, 1, 2};
|
||||
std::array<int, arraySize(A)> B;
|
||||
acxxel::DeviceMemory<int> X =
|
||||
Platform->mallocD<int>(arraySize(A)).takeValue();
|
||||
acxxel::Stream Stream = Platform->createStream().takeValue();
|
||||
acxxel::AsyncHostMemory<int> AsyncA =
|
||||
Platform->registerHostMem(A).takeValue();
|
||||
acxxel::AsyncHostMemory<int> AsyncB =
|
||||
Platform->registerHostMem(B).takeValue();
|
||||
EXPECT_FALSE(Stream.asyncCopyHToD(AsyncA, X).takeStatus().isError());
|
||||
EXPECT_FALSE(Stream.asyncCopyDToH(X, AsyncB).takeStatus().isError());
|
||||
EXPECT_FALSE(Stream.sync().isError());
|
||||
for (size_t I = 0; I < arraySize(A); ++I)
|
||||
EXPECT_EQ(A[I], B[I]);
|
||||
}
|
||||
|
||||
TEST_P(AcxxelTest, AsyncMemsetD) {
|
||||
acxxel::Platform *Platform = GetParam()().takeValue();
|
||||
constexpr size_t ArrayLength = 10;
|
||||
std::array<uint32_t, ArrayLength> Host;
|
||||
acxxel::DeviceMemory<uint32_t> X =
|
||||
Platform->mallocD<uint32_t>(ArrayLength).takeValue();
|
||||
acxxel::Stream Stream = Platform->createStream().takeValue();
|
||||
acxxel::AsyncHostMemory<uint32_t> AsyncHost =
|
||||
Platform->registerHostMem(Host).takeValue();
|
||||
EXPECT_FALSE(Stream.asyncMemsetD(X, 0x12).takeStatus().isError());
|
||||
EXPECT_FALSE(Stream.asyncCopyDToH(X, AsyncHost).takeStatus().isError());
|
||||
EXPECT_FALSE(Stream.sync().isError());
|
||||
for (size_t I = 0; I < ArrayLength; ++I)
|
||||
EXPECT_EQ(0x12121212u, Host[I]);
|
||||
}
|
||||
|
||||
TEST_P(AcxxelTest, RegisterHostMem) {
|
||||
acxxel::Platform *Platform = GetParam()().takeValue();
|
||||
auto Data = std::unique_ptr<int[]>(new int[3]);
|
||||
acxxel::Expected<acxxel::AsyncHostMemory<const int>> MaybeAsyncHostMemory =
|
||||
Platform->registerHostMem<int>({Data.get(), 3});
|
||||
EXPECT_FALSE(MaybeAsyncHostMemory.isError())
|
||||
<< MaybeAsyncHostMemory.getError().getMessage();
|
||||
acxxel::AsyncHostMemory<const int> AsyncHostMemory =
|
||||
MaybeAsyncHostMemory.takeValue();
|
||||
EXPECT_EQ(Data.get(), AsyncHostMemory.data());
|
||||
EXPECT_EQ(3, AsyncHostMemory.size());
|
||||
}
|
||||
|
||||
struct RefCounter {
|
||||
static int Count;
|
||||
|
||||
RefCounter() { ++Count; }
|
||||
~RefCounter() { --Count; }
|
||||
RefCounter(const RefCounter &) = delete;
|
||||
RefCounter &operator=(const RefCounter &) = delete;
|
||||
};
|
||||
|
||||
int RefCounter::Count;
|
||||
|
||||
TEST_P(AcxxelTest, OwnedAsyncHost) {
|
||||
acxxel::Platform *Platform = GetParam()().takeValue();
|
||||
RefCounter::Count = 0;
|
||||
{
|
||||
acxxel::OwnedAsyncHostMemory<RefCounter> A =
|
||||
Platform->newAsyncHostMem<RefCounter>(3).takeValue();
|
||||
EXPECT_EQ(3, RefCounter::Count);
|
||||
}
|
||||
EXPECT_EQ(0, RefCounter::Count);
|
||||
}
|
||||
|
||||
TEST_P(AcxxelTest, OwnedAsyncCopyHostAndDevice) {
|
||||
acxxel::Platform *Platform = GetParam()().takeValue();
|
||||
size_t Length = 3;
|
||||
acxxel::OwnedAsyncHostMemory<int> A =
|
||||
Platform->newAsyncHostMem<int>(Length).takeValue();
|
||||
for (size_t I = 0; I < Length; ++I)
|
||||
A[I] = I;
|
||||
acxxel::OwnedAsyncHostMemory<int> B =
|
||||
Platform->newAsyncHostMem<int>(Length).takeValue();
|
||||
acxxel::DeviceMemory<int> X = Platform->mallocD<int>(Length).takeValue();
|
||||
acxxel::Stream Stream = Platform->createStream().takeValue();
|
||||
EXPECT_FALSE(Stream.asyncCopyHToD(A, X).takeStatus().isError());
|
||||
EXPECT_FALSE(Stream.asyncCopyDToH(X, B).takeStatus().isError());
|
||||
EXPECT_FALSE(Stream.sync().isError());
|
||||
for (size_t I = 0; I < Length; ++I)
|
||||
EXPECT_EQ(A[I], B[I]);
|
||||
}
|
||||
|
||||
TEST_P(AcxxelTest, AsyncCopyDToD) {
|
||||
acxxel::Platform *Platform = GetParam()().takeValue();
|
||||
int A[] = {0, 1, 2};
|
||||
std::array<int, arraySize(A)> B;
|
||||
acxxel::DeviceMemory<int> X =
|
||||
Platform->mallocD<int>(arraySize(A)).takeValue();
|
||||
acxxel::DeviceMemory<int> Y =
|
||||
Platform->mallocD<int>(arraySize(A)).takeValue();
|
||||
acxxel::Stream Stream = Platform->createStream().takeValue();
|
||||
acxxel::AsyncHostMemory<int> AsyncA =
|
||||
Platform->registerHostMem(A).takeValue();
|
||||
acxxel::AsyncHostMemory<int> AsyncB =
|
||||
Platform->registerHostMem(B).takeValue();
|
||||
EXPECT_FALSE(Stream.asyncCopyHToD(AsyncA, X).takeStatus().isError());
|
||||
EXPECT_FALSE(Stream.asyncCopyDToD(X, Y).takeStatus().isError());
|
||||
EXPECT_FALSE(Stream.asyncCopyDToH(Y, AsyncB).takeStatus().isError());
|
||||
EXPECT_FALSE(Stream.sync().isError());
|
||||
for (size_t I = 0; I < arraySize(A); ++I)
|
||||
EXPECT_EQ(A[I], B[I]);
|
||||
}
|
||||
|
||||
TEST_P(AcxxelTest, Stream) {
|
||||
acxxel::Platform *Platform = GetParam()().takeValue();
|
||||
acxxel::Stream Stream = Platform->createStream().takeValue();
|
||||
EXPECT_FALSE(Stream.sync().isError());
|
||||
}
|
||||
|
||||
TEST_P(AcxxelTest, Event) {
|
||||
acxxel::Platform *Platform = GetParam()().takeValue();
|
||||
acxxel::Event Event = Platform->createEvent().takeValue();
|
||||
EXPECT_TRUE(Event.isDone());
|
||||
EXPECT_FALSE(Event.sync().isError());
|
||||
}
|
||||
|
||||
TEST_P(AcxxelTest, RecordEventsInAStream) {
|
||||
acxxel::Platform *Platform = GetParam()().takeValue();
|
||||
acxxel::Stream Stream = Platform->createStream().takeValue();
|
||||
acxxel::Event Start = Platform->createEvent().takeValue();
|
||||
acxxel::Event End = Platform->createEvent().takeValue();
|
||||
EXPECT_FALSE(Stream.enqueueEvent(Start).takeStatus().isError());
|
||||
EXPECT_FALSE(Start.sync().isError());
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
EXPECT_FALSE(Stream.enqueueEvent(End).takeStatus().isError());
|
||||
EXPECT_FALSE(End.sync().isError());
|
||||
EXPECT_GT(End.getSecondsSince(Start).takeValue(), 0);
|
||||
}
|
||||
|
||||
TEST_P(AcxxelTest, StreamCallback) {
|
||||
acxxel::Platform *Platform = GetParam()().takeValue();
|
||||
int Value = 0;
|
||||
acxxel::Stream Stream = Platform->createStream().takeValue();
|
||||
EXPECT_FALSE(
|
||||
Stream
|
||||
.addCallback([&Value](acxxel::Stream &, const acxxel::Status &) {
|
||||
Value = 42;
|
||||
})
|
||||
.takeStatus()
|
||||
.isError());
|
||||
EXPECT_FALSE(Stream.sync().isError());
|
||||
EXPECT_EQ(42, Value);
|
||||
}
|
||||
|
||||
TEST_P(AcxxelTest, WaitForEventsInAStream) {
|
||||
acxxel::Platform *Platform = GetParam()().takeValue();
|
||||
acxxel::Stream Stream0 = Platform->createStream().takeValue();
|
||||
acxxel::Stream Stream1 = Platform->createStream().takeValue();
|
||||
acxxel::Event Event0 = Platform->createEvent().takeValue();
|
||||
acxxel::Event Event1 = Platform->createEvent().takeValue();
|
||||
|
||||
// Thread loops on Stream0 until someone sets the GoFlag, then set the
|
||||
// MarkerFlag.
|
||||
|
||||
std::mutex Mutex;
|
||||
std::condition_variable ConditionVar;
|
||||
bool GoFlag = false;
|
||||
bool MarkerFlag = false;
|
||||
|
||||
EXPECT_FALSE(Stream0
|
||||
.addCallback([&Mutex, &ConditionVar, &GoFlag, &MarkerFlag](
|
||||
acxxel::Stream &, const acxxel::Status &) {
|
||||
std::unique_lock<std::mutex> Lock(Mutex);
|
||||
ConditionVar.wait(Lock,
|
||||
[&GoFlag] { return GoFlag == true; });
|
||||
MarkerFlag = true;
|
||||
})
|
||||
.takeStatus()
|
||||
.isError());
|
||||
|
||||
// Event0 can only occur after GoFlag and MarkerFlag are set.
|
||||
EXPECT_FALSE(Stream0.enqueueEvent(Event0).takeStatus().isError());
|
||||
|
||||
// Use waitOnEvent to make a callback on Stream1 wait for an event on Stream0.
|
||||
EXPECT_FALSE(Stream1.waitOnEvent(Event0).isError());
|
||||
EXPECT_FALSE(Stream1.enqueueEvent(Event1).takeStatus().isError());
|
||||
EXPECT_FALSE(Stream1
|
||||
.addCallback([&Mutex, &MarkerFlag](acxxel::Stream &,
|
||||
const acxxel::Status &) {
|
||||
std::unique_lock<std::mutex> Lock(Mutex);
|
||||
// This makes sure that this callback runs after the
|
||||
// callback on Stream0.
|
||||
EXPECT_TRUE(MarkerFlag);
|
||||
})
|
||||
.takeStatus()
|
||||
.isError());
|
||||
|
||||
// Allow the callback on Stream0 to set MarkerFlag and finish.
|
||||
{
|
||||
std::unique_lock<std::mutex> Lock(Mutex);
|
||||
GoFlag = true;
|
||||
}
|
||||
ConditionVar.notify_one();
|
||||
|
||||
// Make sure the events have finished and that Event1 did not happen before
|
||||
// Event0.
|
||||
EXPECT_FALSE(Event0.sync().isError());
|
||||
EXPECT_FALSE(Event1.sync().isError());
|
||||
EXPECT_FALSE(Stream1.sync().isError());
|
||||
}
|
||||
|
||||
#if defined(ACXXEL_ENABLE_CUDA) || defined(ACXXEL_ENABLE_OPENCL)
|
||||
INSTANTIATE_TEST_CASE_P(BothPlatformTest, AcxxelTest,
|
||||
::testing::Values(
|
||||
#ifdef ACXXEL_ENABLE_CUDA
|
||||
acxxel::getCUDAPlatform
|
||||
#ifdef ACXXEL_ENABLE_OPENCL
|
||||
,
|
||||
#endif
|
||||
#endif
|
||||
#ifdef ACXXEL_ENABLE_OPENCL
|
||||
acxxel::getOpenCLPlatform
|
||||
#endif
|
||||
));
|
||||
#endif
|
||||
|
||||
} // namespace
|
@ -1,87 +0,0 @@
|
||||
#include "acxxel.h"
|
||||
#include "config.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace {
|
||||
|
||||
using PlatformGetter = acxxel::Expected<acxxel::Platform *> (*)();
|
||||
class MultiDeviceTest : public ::testing::TestWithParam<PlatformGetter> {};
|
||||
|
||||
TEST_P(MultiDeviceTest, AsyncCopy) {
|
||||
acxxel::Platform *Platform = GetParam()().takeValue();
|
||||
int DeviceCount = Platform->getDeviceCount().getValue();
|
||||
EXPECT_GT(DeviceCount, 0);
|
||||
|
||||
int Length = 3;
|
||||
auto A = std::unique_ptr<int[]>(new int[Length]);
|
||||
auto B0 = std::unique_ptr<int[]>(new int[Length]);
|
||||
auto B1 = std::unique_ptr<int[]>(new int[Length]);
|
||||
|
||||
auto ASpan = acxxel::Span<int>(A.get(), Length);
|
||||
auto B0Span = acxxel::Span<int>(B0.get(), Length);
|
||||
auto B1Span = acxxel::Span<int>(B1.get(), Length);
|
||||
|
||||
for (int I = 0; I < Length; ++I)
|
||||
A[I] = I;
|
||||
|
||||
auto AsyncA = Platform->registerHostMem(ASpan).takeValue();
|
||||
auto AsyncB0 = Platform->registerHostMem(B0Span).takeValue();
|
||||
auto AsyncB1 = Platform->registerHostMem(B1Span).takeValue();
|
||||
|
||||
acxxel::Stream Stream0 = Platform->createStream(0).takeValue();
|
||||
acxxel::Stream Stream1 = Platform->createStream(1).takeValue();
|
||||
auto Device0 = Platform->mallocD<int>(Length, 0).takeValue();
|
||||
auto Device1 = Platform->mallocD<int>(Length, 1).takeValue();
|
||||
|
||||
EXPECT_FALSE(Stream0.asyncCopyHToD(AsyncA, Device0, Length)
|
||||
.asyncCopyDToH(Device0, AsyncB0, Length)
|
||||
.sync()
|
||||
.isError());
|
||||
|
||||
EXPECT_FALSE(Stream1.asyncCopyHToD(AsyncA, Device1, Length)
|
||||
.asyncCopyDToH(Device1, AsyncB1, Length)
|
||||
.sync()
|
||||
.isError());
|
||||
|
||||
for (int I = 0; I < Length; ++I) {
|
||||
EXPECT_EQ(B0[I], I);
|
||||
EXPECT_EQ(B1[I], I);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(MultiDeviceTest, Events) {
|
||||
acxxel::Platform *Platform = GetParam()().takeValue();
|
||||
int DeviceCount = Platform->getDeviceCount().getValue();
|
||||
EXPECT_GT(DeviceCount, 0);
|
||||
|
||||
acxxel::Stream Stream0 = Platform->createStream(0).takeValue();
|
||||
acxxel::Stream Stream1 = Platform->createStream(1).takeValue();
|
||||
acxxel::Event Event0 = Platform->createEvent(0).takeValue();
|
||||
acxxel::Event Event1 = Platform->createEvent(1).takeValue();
|
||||
|
||||
EXPECT_FALSE(Stream0.enqueueEvent(Event0).sync().isError());
|
||||
EXPECT_FALSE(Stream1.enqueueEvent(Event1).sync().isError());
|
||||
|
||||
EXPECT_TRUE(Event0.isDone());
|
||||
EXPECT_TRUE(Event1.isDone());
|
||||
|
||||
EXPECT_FALSE(Event0.sync().isError());
|
||||
EXPECT_FALSE(Event1.sync().isError());
|
||||
}
|
||||
|
||||
#if defined(ACXXEL_ENABLE_CUDA) || defined(ACXXEL_ENABLE_OPENCL)
|
||||
INSTANTIATE_TEST_CASE_P(BothPlatformTest, MultiDeviceTest,
|
||||
::testing::Values(
|
||||
#ifdef ACXXEL_ENABLE_CUDA
|
||||
acxxel::getCUDAPlatform
|
||||
#ifdef ACXXEL_ENABLE_OPENCL
|
||||
,
|
||||
#endif
|
||||
#endif
|
||||
#ifdef ACXXEL_ENABLE_OPENCL
|
||||
acxxel::getOpenCLPlatform
|
||||
#endif
|
||||
));
|
||||
#endif
|
||||
|
||||
} // namespace
|
@ -1,61 +0,0 @@
|
||||
//===--- opencl_test.cpp - Tests for OpenCL and the Acxxel API ------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "acxxel.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
|
||||
namespace {
|
||||
|
||||
static const char *SaxpyKernelSource = R"(
|
||||
__kernel void saxpyKernel(float A, __global float *X, __global float *Y, int N) {
|
||||
int I = get_global_id(0);
|
||||
if (I < N)
|
||||
X[I] = A * X[I] + Y[I];
|
||||
}
|
||||
)";
|
||||
|
||||
TEST(OpenCL, Saxpy) {
|
||||
constexpr size_t Length = 3;
|
||||
|
||||
float A = 2.f;
|
||||
std::array<float, Length> X = {{0.f, 1.f, 2.f}};
|
||||
std::array<float, Length> Y = {{3.f, 4.f, 5.f}};
|
||||
std::array<float, Length> Expected = {{3.f, 6.f, 9.f}};
|
||||
|
||||
acxxel::Platform *OpenCL = acxxel::getOpenCLPlatform().getValue();
|
||||
acxxel::Stream Stream = OpenCL->createStream().takeValue();
|
||||
auto DeviceX = OpenCL->mallocD<float>(Length).takeValue();
|
||||
auto DeviceY = OpenCL->mallocD<float>(Length).takeValue();
|
||||
Stream.syncCopyHToD(X, DeviceX);
|
||||
Stream.syncCopyHToD(Y, DeviceY);
|
||||
acxxel::Program Program =
|
||||
OpenCL
|
||||
->createProgramFromSource(acxxel::Span<const char>(
|
||||
SaxpyKernelSource, std::strlen(SaxpyKernelSource)))
|
||||
.takeValue();
|
||||
acxxel::Kernel Kernel = Program.createKernel("saxpyKernel").takeValue();
|
||||
float *RawX = static_cast<float *>(DeviceX);
|
||||
float *RawY = static_cast<float *>(DeviceY);
|
||||
int IntLength = Length;
|
||||
void *Arguments[] = {&A, &RawX, &RawY, &IntLength};
|
||||
size_t ArgumentSizes[] = {sizeof(float), sizeof(float *), sizeof(float *),
|
||||
sizeof(int)};
|
||||
EXPECT_FALSE(
|
||||
Stream.asyncKernelLaunch(Kernel, Length, Arguments, ArgumentSizes)
|
||||
.takeStatus()
|
||||
.isError());
|
||||
Stream.syncCopyDToH(DeviceX, X);
|
||||
EXPECT_FALSE(Stream.sync().isError());
|
||||
|
||||
EXPECT_EQ(X, Expected);
|
||||
}
|
||||
|
||||
} // namespace
|
@ -1,292 +0,0 @@
|
||||
//===--- span_test.cpp - Tests for the span class -------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "span.h"
|
||||
|
||||
#include "gmock/gmock.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename T, size_t N> size_t arraySize(T (&)[N]) { return N; }
|
||||
|
||||
TEST(Span, NullConstruction) {
|
||||
acxxel::Span<int> Span0;
|
||||
EXPECT_EQ(nullptr, Span0.data());
|
||||
EXPECT_EQ(0, Span0.size());
|
||||
|
||||
acxxel::Span<int> Span1(nullptr);
|
||||
EXPECT_EQ(nullptr, Span1.data());
|
||||
EXPECT_EQ(0, Span1.size());
|
||||
}
|
||||
|
||||
TEST(Span, PtrSizeConstruction) {
|
||||
int ZeroSize = 0;
|
||||
acxxel::Span<int> Span0(nullptr, ZeroSize);
|
||||
EXPECT_EQ(Span0.data(), nullptr);
|
||||
EXPECT_EQ(Span0.size(), 0);
|
||||
|
||||
int Values[] = {0, 1, 2};
|
||||
acxxel::Span<int> Span1(Values, arraySize(Values));
|
||||
EXPECT_EQ(Span1.data(), Values);
|
||||
EXPECT_EQ(static_cast<size_t>(Span1.size()), arraySize(Values));
|
||||
|
||||
acxxel::Span<int> Span2(Values, ZeroSize);
|
||||
EXPECT_EQ(Span2.data(), Values);
|
||||
EXPECT_EQ(Span2.size(), 0);
|
||||
}
|
||||
|
||||
TEST(Span, PtrSizeConstruction_NegativeCount) {
|
||||
int Values[] = {0, 1, 2};
|
||||
EXPECT_DEATH(acxxel::Span<int> Span0(Values, -1), "terminate");
|
||||
}
|
||||
|
||||
TEST(Span, PtrSizeConstruction_NullptrNonzeroSize) {
|
||||
EXPECT_DEATH(acxxel::Span<int> Span0(nullptr, 1), "terminate");
|
||||
}
|
||||
|
||||
TEST(Span, FirstLastConstruction) {
|
||||
int Values[] = {0, 1, 2};
|
||||
|
||||
acxxel::Span<int> Span0(Values, Values);
|
||||
EXPECT_EQ(Span0.data(), Values);
|
||||
EXPECT_EQ(Span0.size(), 0);
|
||||
|
||||
acxxel::Span<int> Span(Values, Values + 2);
|
||||
EXPECT_EQ(Span.data(), Values);
|
||||
EXPECT_EQ(Span.size(), 2);
|
||||
}
|
||||
|
||||
TEST(Span, FirstLastConstruction_LastBeforeFirst) {
|
||||
int Values[] = {0, 1, 2};
|
||||
EXPECT_DEATH(acxxel::Span<int> Span(Values + 2, Values), "terminate");
|
||||
}
|
||||
|
||||
TEST(Span, ArrayConstruction) {
|
||||
int Array[] = {0, 1, 2};
|
||||
acxxel::Span<int> Span(Array);
|
||||
EXPECT_EQ(Span.data(), Array);
|
||||
EXPECT_EQ(Span.size(), 3);
|
||||
}
|
||||
|
||||
TEST(Span, StdArrayConstruction) {
|
||||
std::array<int, 3> Array{{0, 1, 2}};
|
||||
acxxel::Span<int> Span(Array);
|
||||
EXPECT_EQ(Span.data(), Array.data());
|
||||
EXPECT_EQ(static_cast<size_t>(Span.size()), Array.size());
|
||||
|
||||
std::array<const int, 3> ConstArray{{0, 1, 2}};
|
||||
acxxel::Span<const int> ConstSpan(ConstArray);
|
||||
EXPECT_EQ(ConstSpan.data(), ConstArray.data());
|
||||
EXPECT_EQ(static_cast<size_t>(ConstSpan.size()), ConstArray.size());
|
||||
}
|
||||
|
||||
TEST(Span, ContainerConstruction) {
|
||||
std::vector<int> Vector = {0, 1, 2};
|
||||
acxxel::Span<int> Span(Vector);
|
||||
EXPECT_EQ(Span.data(), &Vector[0]);
|
||||
EXPECT_EQ(static_cast<size_t>(Span.size()), Vector.size());
|
||||
}
|
||||
|
||||
TEST(Span, CopyConstruction) {
|
||||
int Values[] = {0, 1, 2};
|
||||
acxxel::Span<int> Span0(Values);
|
||||
acxxel::Span<int> Span1(Span0);
|
||||
EXPECT_EQ(Span1.data(), Values);
|
||||
EXPECT_EQ(static_cast<size_t>(Span1.size()), arraySize(Values));
|
||||
}
|
||||
|
||||
TEST(Span, CopyAssignment) {
|
||||
int Values[] = {0, 1, 2};
|
||||
acxxel::Span<int> Span0(Values);
|
||||
acxxel::Span<int> Span1;
|
||||
Span1 = Span0;
|
||||
EXPECT_EQ(Span1.data(), Values);
|
||||
EXPECT_EQ(static_cast<size_t>(Span1.size()), arraySize(Values));
|
||||
}
|
||||
|
||||
TEST(Span, CopyConstFromNonConst) {
|
||||
int Values[] = {0, 1, 2};
|
||||
acxxel::Span<int> Span0(Values);
|
||||
acxxel::Span<const int> Span1(Span0);
|
||||
EXPECT_EQ(Span1.data(), Values);
|
||||
EXPECT_EQ(static_cast<size_t>(Span1.size()), arraySize(Values));
|
||||
}
|
||||
|
||||
TEST(Span, FirstMethod) {
|
||||
int Values[] = {0, 1, 2};
|
||||
acxxel::Span<int> Span(Values);
|
||||
acxxel::Span<int> Span0 = Span.first(0);
|
||||
acxxel::Span<int> Span1 = Span.first(1);
|
||||
acxxel::Span<int> Span2 = Span.first(2);
|
||||
acxxel::Span<int> Span3 = Span.first(3);
|
||||
|
||||
EXPECT_EQ(Span0.data(), Values);
|
||||
EXPECT_EQ(Span1.data(), Values);
|
||||
EXPECT_EQ(Span2.data(), Values);
|
||||
EXPECT_EQ(Span3.data(), Values);
|
||||
|
||||
EXPECT_TRUE(Span0.empty());
|
||||
|
||||
EXPECT_THAT(Span1, ::testing::ElementsAre(0));
|
||||
EXPECT_THAT(Span2, ::testing::ElementsAre(0, 1));
|
||||
EXPECT_THAT(Span3, ::testing::ElementsAre(0, 1, 2));
|
||||
}
|
||||
|
||||
TEST(Span, FirstMethod_IllegalArguments) {
|
||||
int Values[] = {0, 1, 2};
|
||||
acxxel::Span<int> Span(Values);
|
||||
|
||||
EXPECT_DEATH(Span.first(-1), "terminate");
|
||||
EXPECT_DEATH(Span.first(4), "terminate");
|
||||
}
|
||||
|
||||
TEST(Span, LastMethod) {
|
||||
int Values[] = {0, 1, 2};
|
||||
acxxel::Span<int> Span(Values);
|
||||
acxxel::Span<int> Span0 = Span.last(0);
|
||||
acxxel::Span<int> Span1 = Span.last(1);
|
||||
acxxel::Span<int> Span2 = Span.last(2);
|
||||
acxxel::Span<int> Span3 = Span.last(3);
|
||||
|
||||
EXPECT_EQ(Span0.data(), Values);
|
||||
EXPECT_EQ(Span1.data(), Values + 2);
|
||||
EXPECT_EQ(Span2.data(), Values + 1);
|
||||
EXPECT_EQ(Span3.data(), Values);
|
||||
|
||||
EXPECT_TRUE(Span0.empty());
|
||||
|
||||
EXPECT_THAT(Span1, ::testing::ElementsAre(2));
|
||||
EXPECT_THAT(Span2, ::testing::ElementsAre(1, 2));
|
||||
EXPECT_THAT(Span3, ::testing::ElementsAre(0, 1, 2));
|
||||
}
|
||||
|
||||
TEST(Span, LastMethod_IllegalArguments) {
|
||||
int Values[] = {0, 1, 2};
|
||||
acxxel::Span<int> Span(Values);
|
||||
|
||||
EXPECT_DEATH(Span.last(-1), "terminate");
|
||||
EXPECT_DEATH(Span.last(4), "terminate");
|
||||
}
|
||||
|
||||
TEST(Span, SubspanMethod) {
|
||||
int Values[] = {0, 1, 2};
|
||||
acxxel::Span<int> Span(Values);
|
||||
|
||||
acxxel::Span<int> Span0 = Span.subspan(0);
|
||||
acxxel::Span<int> Span0e = Span.subspan(0, acxxel::dynamic_extent);
|
||||
acxxel::Span<int> Span00 = Span.subspan(0, 0);
|
||||
acxxel::Span<int> Span01 = Span.subspan(0, 1);
|
||||
acxxel::Span<int> Span02 = Span.subspan(0, 2);
|
||||
acxxel::Span<int> Span03 = Span.subspan(0, 3);
|
||||
|
||||
acxxel::Span<int> Span1 = Span.subspan(1);
|
||||
acxxel::Span<int> Span1e = Span.subspan(1, acxxel::dynamic_extent);
|
||||
acxxel::Span<int> Span10 = Span.subspan(1, 0);
|
||||
acxxel::Span<int> Span11 = Span.subspan(1, 1);
|
||||
acxxel::Span<int> Span12 = Span.subspan(1, 2);
|
||||
|
||||
acxxel::Span<int> Span2 = Span.subspan(2);
|
||||
acxxel::Span<int> Span2e = Span.subspan(2, acxxel::dynamic_extent);
|
||||
acxxel::Span<int> Span20 = Span.subspan(2, 0);
|
||||
acxxel::Span<int> Span21 = Span.subspan(2, 1);
|
||||
|
||||
acxxel::Span<int> Span3 = Span.subspan(3);
|
||||
acxxel::Span<int> Span3e = Span.subspan(3, acxxel::dynamic_extent);
|
||||
acxxel::Span<int> Span30 = Span.subspan(3, 0);
|
||||
|
||||
EXPECT_EQ(Span0.data(), Values);
|
||||
EXPECT_EQ(Span0e.data(), Values);
|
||||
EXPECT_EQ(Span00.data(), Values);
|
||||
EXPECT_EQ(Span01.data(), Values);
|
||||
EXPECT_EQ(Span02.data(), Values);
|
||||
EXPECT_EQ(Span03.data(), Values);
|
||||
|
||||
EXPECT_EQ(Span1.data(), Values + 1);
|
||||
EXPECT_EQ(Span1e.data(), Values + 1);
|
||||
EXPECT_EQ(Span10.data(), Values + 1);
|
||||
EXPECT_EQ(Span11.data(), Values + 1);
|
||||
EXPECT_EQ(Span12.data(), Values + 1);
|
||||
|
||||
EXPECT_EQ(Span2.data(), Values + 2);
|
||||
EXPECT_EQ(Span2e.data(), Values + 2);
|
||||
EXPECT_EQ(Span20.data(), Values + 2);
|
||||
EXPECT_EQ(Span21.data(), Values + 2);
|
||||
|
||||
EXPECT_EQ(Span3.data(), Values + 3);
|
||||
EXPECT_EQ(Span3e.data(), Values + 3);
|
||||
EXPECT_EQ(Span30.data(), Values + 3);
|
||||
|
||||
EXPECT_TRUE(Span00.empty());
|
||||
EXPECT_TRUE(Span10.empty());
|
||||
EXPECT_TRUE(Span20.empty());
|
||||
EXPECT_TRUE(Span30.empty());
|
||||
|
||||
EXPECT_THAT(Span0, ::testing::ElementsAre(0, 1, 2));
|
||||
EXPECT_THAT(Span0e, ::testing::ElementsAre(0, 1, 2));
|
||||
EXPECT_THAT(Span01, ::testing::ElementsAre(0));
|
||||
EXPECT_THAT(Span02, ::testing::ElementsAre(0, 1));
|
||||
EXPECT_THAT(Span03, ::testing::ElementsAre(0, 1, 2));
|
||||
|
||||
EXPECT_THAT(Span1, ::testing::ElementsAre(1, 2));
|
||||
EXPECT_THAT(Span1e, ::testing::ElementsAre(1, 2));
|
||||
EXPECT_THAT(Span11, ::testing::ElementsAre(1));
|
||||
EXPECT_THAT(Span12, ::testing::ElementsAre(1, 2));
|
||||
|
||||
EXPECT_THAT(Span2, ::testing::ElementsAre(2));
|
||||
EXPECT_THAT(Span2e, ::testing::ElementsAre(2));
|
||||
EXPECT_THAT(Span21, ::testing::ElementsAre(2));
|
||||
|
||||
EXPECT_TRUE(Span3.empty());
|
||||
EXPECT_TRUE(Span3e.empty());
|
||||
}
|
||||
|
||||
TEST(Span, SubspanMethod_IllegalArguments) {
|
||||
int Values[] = {0, 1, 2};
|
||||
acxxel::Span<int> Span(Values);
|
||||
EXPECT_DEATH(Span.subspan(-1, 0), "terminate");
|
||||
EXPECT_DEATH(Span.subspan(0, -2), "terminate");
|
||||
EXPECT_DEATH(Span.subspan(0, 4), "terminate");
|
||||
EXPECT_DEATH(Span.subspan(1, 3), "terminate");
|
||||
EXPECT_DEATH(Span.subspan(2, 2), "terminate");
|
||||
EXPECT_DEATH(Span.subspan(3, 1), "terminate");
|
||||
EXPECT_DEATH(Span.subspan(4, 0), "terminate");
|
||||
}
|
||||
|
||||
TEST(Span, ElementAccess) {
|
||||
int Values[] = {0, 1, 2};
|
||||
acxxel::Span<int> Span(Values);
|
||||
|
||||
EXPECT_EQ(&Span[0], Values);
|
||||
EXPECT_EQ(&Span[1], Values + 1);
|
||||
EXPECT_EQ(&Span[2], Values + 2);
|
||||
EXPECT_EQ(&Span(0), Values);
|
||||
EXPECT_EQ(&Span(1), Values + 1);
|
||||
EXPECT_EQ(&Span(2), Values + 2);
|
||||
|
||||
Span[0] = 5;
|
||||
EXPECT_EQ(Values[0], 5);
|
||||
|
||||
Span(0) = 0;
|
||||
EXPECT_EQ(Values[0], 0);
|
||||
|
||||
const int ConstValues[] = {0, 1, 2};
|
||||
acxxel::Span<const int> ConstSpan(ConstValues);
|
||||
|
||||
EXPECT_EQ(&ConstSpan[0], ConstValues);
|
||||
EXPECT_EQ(&ConstSpan[1], ConstValues + 1);
|
||||
EXPECT_EQ(&ConstSpan[2], ConstValues + 2);
|
||||
EXPECT_EQ(&ConstSpan(0), ConstValues);
|
||||
EXPECT_EQ(&ConstSpan(1), ConstValues + 1);
|
||||
EXPECT_EQ(&ConstSpan(2), ConstValues + 2);
|
||||
}
|
||||
|
||||
} // namespace
|
@ -1,55 +0,0 @@
|
||||
//===--- status_test.cpp - Tests for the Status and Expected classes ------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "status.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace {
|
||||
|
||||
struct RefCounter {
|
||||
static int Count;
|
||||
|
||||
RefCounter() { ++Count; }
|
||||
~RefCounter() { --Count; }
|
||||
RefCounter(const RefCounter &) = delete;
|
||||
RefCounter &operator=(const RefCounter &) = delete;
|
||||
};
|
||||
|
||||
int RefCounter::Count;
|
||||
|
||||
TEST(Expected, RefCounter) {
|
||||
RefCounter::Count = 0;
|
||||
using uptr = std::unique_ptr<RefCounter>;
|
||||
|
||||
acxxel::Expected<uptr> E0(uptr(new RefCounter));
|
||||
EXPECT_FALSE(E0.isError());
|
||||
EXPECT_EQ(1, RefCounter::Count);
|
||||
|
||||
acxxel::Expected<uptr> E1(std::move(E0));
|
||||
EXPECT_FALSE(E1.isError());
|
||||
EXPECT_EQ(1, RefCounter::Count);
|
||||
|
||||
acxxel::Expected<uptr> E2(acxxel::Status("nothing in here yet"));
|
||||
EXPECT_TRUE(E2.isError());
|
||||
EXPECT_EQ(1, RefCounter::Count);
|
||||
E2 = std::move(E1);
|
||||
EXPECT_FALSE(E2.isError());
|
||||
EXPECT_EQ(1, RefCounter::Count);
|
||||
|
||||
EXPECT_EQ(1, E2.getValue()->Count);
|
||||
EXPECT_FALSE(E2.isError());
|
||||
EXPECT_EQ(1, RefCounter::Count);
|
||||
|
||||
EXPECT_EQ(1, E2.takeValue()->Count);
|
||||
EXPECT_EQ(0, RefCounter::Count);
|
||||
}
|
||||
|
||||
} // namespace
|
Loading…
x
Reference in New Issue
Block a user