Remove unused parallel-libs project

Differential Revision: https://reviews.llvm.org/D112265
2021-10-21 13:40:29 -07:00 · 2021-10-21 13:40:29 -07:00 · db0486c46f
commit db0486c46f
parent 458ed5fcc3
31 changed files with 4 additions and 6984 deletions
--- a/README.md
+++ b/README.md
@ -66,8 +66,7 @@ This is an example work-flow and configuration to get and build the LLVM source:
        * ``-DLLVM_ENABLE_PROJECTS='...'`` --- semicolon-separated list of the LLVM
          sub-projects you'd like to additionally build. Can include any of: clang,
          clang-tools-extra, compiler-rt,cross-project-tests, flang, libc, libclc,
-          libcxx, libcxxabi, libunwind, lld, lldb, mlir, openmp, parallel-libs,
-          polly, or pstl.
+          libcxx, libcxxabi, libunwind, lld, lldb, mlir, openmp, polly, or pstl.

          For example, to build LLVM, Clang, libcxx, and libcxxabi, use
          ``-DLLVM_ENABLE_PROJECTS="clang;libcxx;libcxxabi"``.
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@ -64,7 +64,7 @@ endif()
 # LLVM_EXTERNAL_${project}_SOURCE_DIR using LLVM_ALL_PROJECTS
 # This allows an easy way of setting up a build directory for llvm and another
 # one for llvm+clang+... using the same sources.
-set(LLVM_ALL_PROJECTS "clang;clang-tools-extra;compiler-rt;cross-project-tests;libc;libclc;libcxx;libcxxabi;libunwind;lld;lldb;mlir;openmp;parallel-libs;polly;pstl")
+set(LLVM_ALL_PROJECTS "clang;clang-tools-extra;compiler-rt;cross-project-tests;libc;libclc;libcxx;libcxxabi;libunwind;lld;lldb;mlir;openmp;polly;pstl")
 # The flang project is not yet part of "all" projects (see C++ requirements)
 set(LLVM_EXTRA_PROJECTS "flang")
 # List of all known projects in the mono repo
--- a/llvm/docs/CMake.rst
+++ b/llvm/docs/CMake.rst
@ -491,7 +491,7 @@ enabled sub-projects. Nearly all of these variable names begin with
  This feature allows to have one build for only LLVM and another for clang+llvm
  using the same source checkout.
  The full list is:
-  ``clang;clang-tools-extra;compiler-rt;cross-project-tests;libc;libclc;libcxx;libcxxabi;libunwind;lld;lldb;openmp;parallel-libs;polly;pstl``
+  ``clang;clang-tools-extra;compiler-rt;cross-project-tests;libc;libclc;libcxx;libcxxabi;libunwind;lld;lldb;openmp;polly;pstl``

 **LLVM_ENABLE_RUNTIMES**:STRING
  Build libc++, libc++abi or other projects using that a just-built compiler.
--- a/llvm/projects/CMakeLists.txt
+++ b/llvm/projects/CMakeLists.txt
@ -10,7 +10,6 @@ foreach(entry ${entries})
       (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/libcxxabi) AND
       (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/libunwind) AND
       (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/test-suite) AND
-       (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/parallel-libs) AND
       (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/openmp) AND
       (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/cross-project-tests))
      get_filename_component(entry_name "${entry}" NAME)
@ -41,7 +40,6 @@ if(${LLVM_BUILD_RUNTIME})
 endif()

 add_llvm_external_project(dragonegg)
-add_llvm_external_project(parallel-libs)
 add_llvm_external_project(openmp)

 if(LLVM_INCLUDE_TESTS)
--- a/llvm/utils/docker/scripts/llvm_checksum/project_tree.py
+++ b/llvm/utils/docker/scripts/llvm_checksum/project_tree.py
@ -69,7 +69,7 @@ def CreateLLVMProjects(single_tree_checkout):
  # Projects that reside inside 'projects/' in a single source tree checkout.
  ORDINARY_PROJECTS = [
      "compiler-rt", "dragonegg", "libcxx", "libcxxabi", "libunwind",
-      "parallel-libs", "test-suite"
+      "test-suite"
  ]
  # Projects that reside inside 'tools/' in a single source tree checkout.
  TOOLS_PROJECTS = ["clang", "lld", "lldb"]
--- a/parallel-libs/.clang-format
+++ b/parallel-libs/.clang-format
@ -1,2 +0,0 @@
-BasedOnStyle: LLVM
-
--- a/parallel-libs/.clang-tidy
+++ b/parallel-libs/.clang-tidy
@ -1,17 +0,0 @@
-Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,-misc-unused-parameters,readability-identifier-naming'
-CheckOptions:
-  - key:             readability-identifier-naming.ClassCase
-    value:           CamelCase
-  - key:             readability-identifier-naming.EnumCase
-    value:           CamelCase
-  - key:             readability-identifier-naming.FunctionCase
-    value:           lowerCase
-  - key:             readability-identifier-naming.MemberCase
-    value:           CamelCase
-  - key:             readability-identifier-naming.ParameterCase
-    value:           CamelCase
-  - key:             readability-identifier-naming.UnionCase
-    value:           CamelCase
-  - key:             readability-identifier-naming.VariableCase
-    value:           CamelCase
-
--- a/parallel-libs/CMakeLists.txt
+++ b/parallel-libs/CMakeLists.txt
@ -1 +0,0 @@
-cmake_minimum_required(VERSION 3.13.4)
--- a/parallel-libs/README.rst
+++ b/parallel-libs/README.rst
@ -1,90 +0,0 @@
-=====================================================
-LLVM parallel-libs Subproject Charter
-=====================================================
-
----------------------------------------------
-Description
----------------------------------------------
-The LLVM open source project will contain a subproject named `parallel-libs`
-which will host the development of libraries which are aimed at enabling
-parallelism in code and which are also closely tied to compiler technology.
-Examples of libraries suitable for hosting within the `parallel-libs`
-subproject are runtime libraries and parallel math libraries. The initial
-candidates for inclusion in this subproject are **StreamExecutor** and
-**libomptarget** which would live in the `streamexecutor` and `libomptarget`
-subdirectories of `parallel-libs`, respectively.
-
-The `parallel-libs` project will host a collection of libraries where each
-library may be dependent on other libraries from the project or may be
-completely independent of any other libraries in the project. The rationale for
-hosting independent libraries within the same subproject is that all libraries
-in the project are providing related functionality that lives at the
-intersection of parallelism and compiler technology. It is expected that some
-libraries which initially began as independent will develop dependencies over
-time either between existing libraries or by extracting common code that can be
-used by each. One of the purposes of this subproject is to provide a working
-space where such refactoring and code sharing can take place.
-
-Libraries in the `parallel-libs` subproject may also depend on the LLVM core
-libraries. This will be useful for avoiding duplication of code within the LLVM
-project for common utilities such as those found in the LLVM support library.
-
-
----------------------------------------------
-Requirements
----------------------------------------------
-Libraries included in the `parallel-libs` subproject must strive to achieve the
-following requirements:
-
-1. Adhere to the LLVM coding standards.
-2. Use the LLVM build and test infrastructure.
-3. Be released under LLVM's license.
-
-
-Coding standards
----------------
-Libraries in `parallel-libs` will match the LLVM coding standards. For existing
-projects being checked into the subproject as-is, an exception will be made
-during the initial check-in, with the understanding that the code will be
-promptly updated to follow the standards. Therefore, a three month grace period
-will be allowed for new libraries to meet the LLVM coding standards.
-
-Additional exceptions to strict adherence to the LLVM coding standards may be
-allowed in certain other cases, but the reasons for such exceptions must be
-discussed and documented on a case-by-case basis.
-
-
-LLVM build and test infrastructure
----------------------------------
-Using the LLVM build and test infrastructure currently means using `cmake` for
-building, `lit` for testing, and `buildbot` for automating build and testing.
-This project will follow the main LLVM project conventions here and track them
-as they evolve.
-
-Each subproject library will be able to build separately without a single,
-unified cmake file, but each subproject libraries will also be integrated into
-the LLVM build so they can be built directly from the top level of the LLVM
-cmake infrastructure.
-
-
-LLVM license
------------
-For simplicity, the `parallel-libs` project will use the normal LLVM license.
-While some runtime libraries use a dual license scheme in LLVM, we anticipate
-the project removing the need for this eventually and in the interim follow the
-simpler but still permissive license. Among other things, this makes it
-straightforward for these libraries to re-use core LLVM libraries where
-appropriate.
-
-
----------------------------------------------
-Mailing List and Bugs
----------------------------------------------
-Two mailing lists will be set up for the project:
-
-1. parallel_libs-dev@lists.llvm.org for discussions among project developers, and
-2. parallel_libs-commits@lists.llvm.org for patches and commits to the project.
-
-Each subproject library will manage its own components in Bugzilla. So, for
-example, there can be several Bugzilla components for different parts of
-StreamExecutor, etc.
--- a/parallel-libs/acxxel/.clang-format
+++ b/parallel-libs/acxxel/.clang-format
@ -1 +0,0 @@
-BasedOnStyle: LLVM
--- a/parallel-libs/acxxel/.clang-tidy
+++ b/parallel-libs/acxxel/.clang-tidy
@ -1,17 +0,0 @@
-Checks: '-*,clang-diagnostic-*,llvm-*,-llvm-header-guard,misc-*,-misc-unused-parameters,readability-identifier-naming'
-CheckOptions:
-  - key:             readability-identifier-naming.ClassCase
-    value:           CamelCase
-  - key:             readability-identifier-naming.EnumCase
-    value:           CamelCase
-  - key:             readability-identifier-naming.FunctionCase
-    value:           camelBack
-  - key:             readability-identifier-naming.MemberCase
-    value:           CamelCase
-  - key:             readability-identifier-naming.ParameterCase
-    value:           CamelCase
-  - key:             readability-identifier-naming.UnionCase
-    value:           CamelCase
-  - key:             readability-identifier-naming.VariableCase
-    value:           CamelCase
-
--- a/parallel-libs/acxxel/CMakeLists.txt
+++ b/parallel-libs/acxxel/CMakeLists.txt
@ -1,79 +0,0 @@
-cmake_minimum_required(VERSION 3.13.4)
-
-option(ACXXEL_ENABLE_UNIT_TESTS "enable acxxel unit tests" ON)
-option(ACXXEL_ENABLE_MULTI_DEVICE_UNIT_TESTS "enable acxxel multi-device unit tests" OFF)
-option(ACXXEL_ENABLE_EXAMPLES "enable acxxel examples" OFF)
-option(ACXXEL_ENABLE_DOXYGEN "enable Doxygen for acxxel" OFF)
-option(ACXXEL_ENABLE_CUDA "enable CUDA for acxxel" ON)
-option(ACXXEL_ENABLE_OPENCL "enable OpenCL for acxxel" ON)
-
-project(acxxel)
-
-if(ACXXEL_ENABLE_CUDA)
-find_package(CUDA REQUIRED)
-include_directories(${CUDA_INCLUDE_DIRS})
-find_library(CUDA_DRIVER_LIBRARY cuda)
-if(NOT CUDA_DRIVER_LIBRARY)
-    message(FATAL_ERROR "could not find libcuda, is the CUDA driver installed on your system?")
-endif(NOT CUDA_DRIVER_LIBRARY)
-set(ACXXEL_CUDA_SOURCES cuda_acxxel.cpp)
-set(ACXXEL_CUDA_LIBRARIES ${CUDA_DRIVER_LIBRARY} ${CUDA_LIBRARIES})
-endif(ACXXEL_ENABLE_CUDA)
-
-if(ACXXEL_ENABLE_OPENCL)
-find_package(OpenCL REQUIRED)
-include_directories(${OpenCL_INCLUDE_DIRS})
-set(ACXXEL_OPENCL_SOURCES opencl_acxxel.cpp)
-set(ACXXEL_OPENCL_LIBRARIES ${OpenCL_LIBRARIES})
-endif()
-
-configure_file(config.h.in config.h)
-include_directories(${CMAKE_CURRENT_BINARY_DIR})
-
-# Insist on C++ 11 features.
-set(CMAKE_CXX_STANDARD 11)
-set(CMAKE_CXX_STANDARD_REQUIRED ON)
-
-# Add warning flags.
-set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -Wall -Wextra")
-if (CXX_SUPPORTS_SUGGEST_OVERRIDE_FLAG)
-    add_compile_options("-Wno-suggest-override")
-endif()
-
-add_library(
-    acxxel
-    acxxel.cpp
-    ${ACXXEL_CUDA_SOURCES}
-    ${ACXXEL_OPENCL_SOURCES})
-target_link_libraries(
-    acxxel
-    ${ACXXEL_CUDA_LIBRARIES}
-    ${ACXXEL_OPENCL_LIBRARIES})
-
-include_directories(${CMAKE_CURRENT_SOURCE_DIR})
-
-if(ACXXEL_ENABLE_EXAMPLES)
-    add_subdirectory(examples)
-endif()
-
-if(ACXXEL_ENABLE_UNIT_TESTS)
-    enable_testing()
-    find_package(GTest REQUIRED)
-    include_directories(${GTEST_INCLUDE_DIRS})
-    find_package(Threads)
-    add_subdirectory(tests)
-endif()
-
-if(ACXXEL_ENABLE_DOXYGEN)
-    find_package(Doxygen REQUIRED)
-    configure_file(Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
-    add_custom_target(
-        acxxel-doc
-        ${DOXYGEN_EXECUTABLE}
-        ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
-        WORKING_DIRECTORY
-        ${CMAKE_CURRENT_BINARY_DIR}
-        COMMENT
-        "Generating acxxel API documentation with Doxygen"
-        VERBATIM)
-endif()
--- a/parallel-libs/acxxel/Doxyfile.in
+++ b/parallel-libs/acxxel/Doxyfile.in
--- a/parallel-libs/acxxel/LICENSE.TXT
+++ b/parallel-libs/acxxel/LICENSE.TXT
@ -1,278 +0,0 @@
-==============================================================================
-The LLVM Project is under the Apache License v2.0 with LLVM Exceptions:
-==============================================================================
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-    1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-    2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-    3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-    4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-    5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-    6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-    7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-    8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-    9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-    END OF TERMS AND CONDITIONS
-
-    APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-    Copyright [yyyy] [name of copyright owner]
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
---- LLVM Exceptions to the Apache 2.0 License ----
-
-As an exception, if, as a result of your compiling your source code, portions
-of this Software are embedded into an Object form of such source code, you
-may redistribute such embedded portions in such Object form without complying
-with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
-
-In addition, if you combine or link compiled forms of this Software with
-software that is licensed under the GPLv2 ("Combined Software") and if a
-court of competent jurisdiction determines that the patent provision (Section
-3), the indemnity provision (Section 9) or other Section of the License
-conflicts with the conditions of the GPLv2, you may retroactively and
-prospectively choose to deem waived or otherwise exclude such Section(s) of
-the License, but only in their entirety and only with respect to the Combined
-Software.
-
-==============================================================================
-Software from third parties included in the LLVM Project:
-==============================================================================
-The LLVM Project contains third party software which is under different license
-terms. All such code will be identified clearly using at least one of two
-mechanisms:
-1) It will be in a separate directory tree with its own `LICENSE.txt` or
-   `LICENSE` file at the top containing the specific license and restrictions
-   which apply to that software, or
-2) It will contain specific license and restriction terms at the top of every
-   file.
-
-==============================================================================
-Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy):
-==============================================================================
-University of Illinois/NCSA
-Open Source License
-
-Copyright (c) 2007-2019 University of Illinois at Urbana-Champaign.
-All rights reserved.
-
-Developed by:
-
-    LLVM Team
-
-    University of Illinois at Urbana-Champaign
-
-    http://llvm.org
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal with
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimers.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-      this list of conditions and the following disclaimers in the
-      documentation and/or other materials provided with the distribution.
-
-    * Neither the names of the LLVM Team, University of Illinois at
-      Urbana-Champaign, nor the names of its contributors may be used to
-      endorse or promote products derived from this Software without specific
-      prior written permission.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
-SOFTWARE.
--- a/parallel-libs/acxxel/acxxel.cpp
+++ b/parallel-libs/acxxel/acxxel.cpp
@ -1,111 +0,0 @@
-//===--- acxxel.cpp - Implementation details for the Acxxel API -----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "acxxel.h"
-#include "config.h"
-
-#include <algorithm>
-#include <iostream>
-#include <string>
-
-namespace acxxel {
-
-namespace cuda {
-Expected<Platform *> getPlatform();
-} // namespace cuda
-
-namespace opencl {
-Expected<Platform *> getPlatform();
-} // namespace opencl
-
-void logWarning(const std::string &Message) {
-  std::cerr << "WARNING: " << Message << "\n";
-}
-
-Expected<Platform *> getCUDAPlatform() {
-#ifdef ACXXEL_ENABLE_CUDA
-  return cuda::getPlatform();
-#else
-  return Status("library was build without CUDA support");
-#endif
-}
-
-Expected<Platform *> getOpenCLPlatform() {
-#ifdef ACXXEL_ENABLE_OPENCL
-  return opencl::getPlatform();
-#else
-  return Status("library was build without OpenCL support");
-#endif
-}
-
-Stream::Stream(Stream &&) noexcept = default;
-Stream &Stream::operator=(Stream &&) noexcept = default;
-
-Status Stream::sync() {
-  return takeStatusOr(ThePlatform->streamSync(TheHandle.get()));
-}
-
-Status Stream::waitOnEvent(Event &Event) {
-  return takeStatusOr(ThePlatform->streamWaitOnEvent(
-      TheHandle.get(), ThePlatform->getEventHandle(Event)));
-}
-
-Stream &
-Stream::addCallback(std::function<void(Stream &, const Status &)> Callback) {
-  setStatus(ThePlatform->addStreamCallback(*this, std::move(Callback)));
-  return *this;
-}
-
-Stream &Stream::asyncKernelLaunch(const Kernel &TheKernel,
-                                  KernelLaunchDimensions LaunchDimensions,
-                                  Span<void *> Arguments,
-                                  Span<size_t> ArgumentSizes,
-                                  size_t SharedMemoryBytes) {
-  setStatus(ThePlatform->rawEnqueueKernelLaunch(
-      TheHandle.get(), TheKernel.TheHandle.get(), LaunchDimensions, Arguments,
-      ArgumentSizes, SharedMemoryBytes));
-  return *this;
-}
-
-Stream &Stream::enqueueEvent(Event &E) {
-  setStatus(ThePlatform->enqueueEvent(ThePlatform->getEventHandle(E),
-                                      TheHandle.get()));
-  return *this;
-}
-
-Event::Event(Event &&) noexcept = default;
-Event &Event::operator=(Event &&) noexcept = default;
-
-bool Event::isDone() { return ThePlatform->eventIsDone(TheHandle.get()); }
-
-Status Event::sync() { return ThePlatform->eventSync(TheHandle.get()); }
-
-Expected<float> Event::getSecondsSince(const Event &Previous) {
-  Expected<float> MaybeSeconds = ThePlatform->getSecondsBetweenEvents(
-      Previous.TheHandle.get(), TheHandle.get());
-  if (MaybeSeconds.isError())
-    MaybeSeconds.getError();
-  return MaybeSeconds;
-}
-
-Expected<Kernel> Program::createKernel(const std::string &Name) {
-  Expected<void *> MaybeKernelHandle =
-      ThePlatform->rawCreateKernel(TheHandle.get(), Name);
-  if (MaybeKernelHandle.isError())
-    return MaybeKernelHandle.getError();
-  return Kernel(ThePlatform, MaybeKernelHandle.getValue(),
-                ThePlatform->getKernelHandleDestructor());
-}
-
-Program::Program(Program &&) noexcept = default;
-Program &Program::operator=(Program &&That) noexcept = default;
-
-Kernel::Kernel(Kernel &&) noexcept = default;
-Kernel &Kernel::operator=(Kernel &&That) noexcept = default;
-
-} // namespace acxxel
--- a/parallel-libs/acxxel/acxxel.h
+++ b/parallel-libs/acxxel/acxxel.h
--- a/parallel-libs/acxxel/config.h.in
+++ b/parallel-libs/acxxel/config.h.in
@ -1,15 +0,0 @@
-//===--- config.h - Macros generated during configuration -------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// This file declares macros that are generated during the configuration stage
-/// of the build.
-///
-//===----------------------------------------------------------------------===//
-
-#cmakedefine ACXXEL_ENABLE_CUDA
-#cmakedefine ACXXEL_ENABLE_OPENCL
--- a/parallel-libs/acxxel/cuda_acxxel.cpp
+++ b/parallel-libs/acxxel/cuda_acxxel.cpp
@ -1,510 +0,0 @@
-//===--- cuda_acxxel.cpp - CUDA implementation of the Acxxel API ----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// This file defines the standard CUDA implementation of the Acxxel API.
-///
-//===----------------------------------------------------------------------===//
-
-#include "acxxel.h"
-
-#include "cuda.h"
-#include "cuda_runtime.h"
-
-#include <array>
-#include <cassert>
-#include <sstream>
-#include <vector>
-
-namespace acxxel {
-
-namespace {
-
-static std::string getCUErrorMessage(CUresult Result) {
-  if (!Result)
-    return "success";
-  const char *ErrorName = "UNKNOWN_ERROR_NAME";
-  const char *ErrorDescription = "UNKNOWN_ERROR_DESCRIPTION";
-  cuGetErrorName(Result, &ErrorName);
-  cuGetErrorString(Result, &ErrorDescription);
-  std::ostringstream OutStream;
-  OutStream << "CUDA driver error: code = " << Result
-            << ", name = " << ErrorName
-            << ", description = " << ErrorDescription;
-  return OutStream.str();
-}
-
-static Status getCUError(CUresult Result, const std::string &Message) {
-  if (!Result)
-    return Status();
-  std::ostringstream OutStream;
-  OutStream << getCUErrorMessage(Result) << ", message = " << Message;
-  return Status(OutStream.str());
-}
-
-static std::string getCUDAErrorMessage(cudaError_t E) {
-  if (!E)
-    return "success";
-  std::ostringstream OutStream;
-  OutStream << "CUDA runtime error: code = " << E
-            << ", name = " << cudaGetErrorName(E)
-            << ", description = " << cudaGetErrorString(E);
-  return OutStream.str();
-}
-
-static Status getCUDAError(cudaError_t E, const std::string &Message) {
-  if (!E)
-    return Status();
-  std::ostringstream OutStream;
-  OutStream << getCUDAErrorMessage(E) << ", message = " << Message;
-  return Status(OutStream.str());
-}
-
-static void logCUWarning(CUresult Result, const std::string &Message) {
-  if (Result) {
-    std::ostringstream OutStream;
-    OutStream << Message << ": " << getCUErrorMessage(Result);
-    logWarning(OutStream.str());
-  }
-}
-
-/// A CUDA Platform implementation.
-class CUDAPlatform : public Platform {
-public:
-  ~CUDAPlatform() override = default;
-
-  static Expected<CUDAPlatform> create();
-
-  Expected<int> getDeviceCount() override;
-
-  Expected<Stream> createStream(int DeviceIndex) override;
-
-  Status streamSync(void *Stream) override;
-
-  Status streamWaitOnEvent(void *Stream, void *Event) override;
-
-  Expected<Event> createEvent(int DeviceIndex) override;
-
-protected:
-  Expected<void *> rawMallocD(ptrdiff_t ByteCount, int DeviceIndex) override;
-  HandleDestructor getDeviceMemoryHandleDestructor() override;
-  void *getDeviceMemorySpanHandle(void *BaseHandle, size_t ByteSize,
-                                  size_t ByteOffset) override;
-  virtual void rawDestroyDeviceMemorySpanHandle(void *Handle) override;
-
-  Expected<void *> rawGetDeviceSymbolAddress(const void *Symbol,
-                                             int DeviceIndex) override;
-  Expected<ptrdiff_t> rawGetDeviceSymbolSize(const void *Symbol,
-                                             int DeviceIndex) override;
-
-  Status rawRegisterHostMem(const void *Memory, ptrdiff_t ByteCount) override;
-  HandleDestructor getUnregisterHostMemoryHandleDestructor() override;
-
-  Expected<void *> rawMallocRegisteredH(ptrdiff_t ByteCount) override;
-  HandleDestructor getFreeHostMemoryHandleDestructor() override;
-
-  Status asyncCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
-                       void *DeviceDst, ptrdiff_t DeviceDstByteOffset,
-                       ptrdiff_t ByteCount, void *Stream) override;
-  Status asyncCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
-                       void *HostDst, ptrdiff_t ByteCount,
-                       void *Stream) override;
-  Status asyncCopyHToD(const void *HostSrc, void *DeviceDst,
-                       ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount,
-                       void *Stream) override;
-
-  Status asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
-                      ptrdiff_t ByteCount, char ByteValue,
-                      void *Stream) override;
-
-  Status addStreamCallback(Stream &Stream, StreamCallback Callback) override;
-
-  Expected<Program> createProgramFromSource(Span<const char> Source,
-                                            int DeviceIndex) override;
-
-  Status enqueueEvent(void *Event, void *Stream) override;
-  bool eventIsDone(void *Event) override;
-  Status eventSync(void *Event) override;
-  Expected<float> getSecondsBetweenEvents(void *StartEvent,
-                                          void *EndEvent) override;
-
-  Expected<void *> rawCreateKernel(void *Program,
-                                   const std::string &Name) override;
-  HandleDestructor getKernelHandleDestructor() override;
-
-  Status rawEnqueueKernelLaunch(void *Stream, void *Kernel,
-                                KernelLaunchDimensions LaunchDimensions,
-                                Span<void *> Arguments,
-                                Span<size_t> ArgumentSizes,
-                                size_t SharedMemoryBytes) override;
-
-private:
-  explicit CUDAPlatform(const std::vector<CUcontext> &Contexts)
-      : TheContexts(Contexts) {}
-
-  Status setContext(int DeviceIndex) {
-    if (DeviceIndex < 0 ||
-        static_cast<size_t>(DeviceIndex) >= TheContexts.size())
-      return Status("invalid deivce index " + std::to_string(DeviceIndex));
-    return getCUError(cuCtxSetCurrent(TheContexts[DeviceIndex]),
-                      "cuCtxSetCurrent");
-  }
-
-  // Vector of contexts for each device.
-  std::vector<CUcontext> TheContexts;
-};
-
-Expected<CUDAPlatform> CUDAPlatform::create() {
-  std::vector<CUcontext> Contexts;
-  if (CUresult Result = cuInit(0))
-    return getCUError(Result, "cuInit");
-
-  int DeviceCount = 0;
-  if (CUresult Result = cuDeviceGetCount(&DeviceCount))
-    return getCUError(Result, "cuDeviceGetCount");
-
-  for (int I = 0; I < DeviceCount; ++I) {
-    CUdevice Device;
-    if (CUresult Result = cuDeviceGet(&Device, I))
-      return getCUError(Result, "cuDeviceGet");
-    CUcontext Context;
-    if (CUresult Result = cuDevicePrimaryCtxRetain(&Context, Device))
-      return getCUError(Result, "cuDevicePrimaryCtxRetain");
-    if (CUresult Result = cuCtxSetCurrent(Context))
-      return getCUError(Result, "cuCtxSetCurrent");
-    Contexts.emplace_back(Context);
-  }
-
-  return CUDAPlatform(Contexts);
-}
-
-Expected<int> CUDAPlatform::getDeviceCount() {
-  int Count = 0;
-  if (CUresult Result = cuDeviceGetCount(&Count))
-    return getCUError(Result, "cuDeviceGetCount");
-  return Count;
-}
-
-static void cudaDestroyStream(void *H) {
-  logCUWarning(cuStreamDestroy(static_cast<CUstream_st *>(H)),
-               "cuStreamDestroy");
-}
-
-Expected<Stream> CUDAPlatform::createStream(int DeviceIndex) {
-  Status S = setContext(DeviceIndex);
-  if (S.isError())
-    return S;
-  unsigned int Flags = CU_STREAM_DEFAULT;
-  CUstream Handle;
-  if (CUresult Result = cuStreamCreate(&Handle, Flags))
-    return getCUError(Result, "cuStreamCreate");
-  return constructStream(this, DeviceIndex, Handle, cudaDestroyStream);
-}
-
-Status CUDAPlatform::streamSync(void *Stream) {
-  return getCUError(cuStreamSynchronize(static_cast<CUstream_st *>(Stream)),
-                    "cuStreamSynchronize");
-}
-
-Status CUDAPlatform::streamWaitOnEvent(void *Stream, void *Event) {
-  // CUDA docs says flags must be 0.
-  unsigned int Flags = 0u;
-  return getCUError(cuStreamWaitEvent(static_cast<CUstream_st *>(Stream),
-                                      static_cast<CUevent_st *>(Event), Flags),
-                    "cuStreamWaitEvent");
-}
-
-static void cudaDestroyEvent(void *H) {
-  logCUWarning(cuEventDestroy(static_cast<CUevent_st *>(H)), "cuEventDestroy");
-}
-
-Expected<Event> CUDAPlatform::createEvent(int DeviceIndex) {
-  Status S = setContext(DeviceIndex);
-  if (S.isError())
-    return S;
-  unsigned int Flags = CU_EVENT_DEFAULT;
-  CUevent Handle;
-  if (CUresult Result = cuEventCreate(&Handle, Flags))
-    return getCUError(Result, "cuEventCreate");
-  return constructEvent(this, DeviceIndex, Handle, cudaDestroyEvent);
-}
-
-Status CUDAPlatform::enqueueEvent(void *Event, void *Stream) {
-  return getCUError(cuEventRecord(static_cast<CUevent_st *>(Event),
-                                  static_cast<CUstream_st *>(Stream)),
-                    "cuEventRecord");
-}
-
-bool CUDAPlatform::eventIsDone(void *Event) {
-  return cuEventQuery(static_cast<CUevent_st *>(Event)) != CUDA_ERROR_NOT_READY;
-}
-
-Status CUDAPlatform::eventSync(void *Event) {
-  return getCUError(cuEventSynchronize(static_cast<CUevent_st *>(Event)),
-                    "cuEventSynchronize");
-}
-
-Expected<float> CUDAPlatform::getSecondsBetweenEvents(void *StartEvent,
-                                                      void *EndEvent) {
-  float Milliseconds;
-  if (CUresult Result = cuEventElapsedTime(
-          &Milliseconds, static_cast<CUevent_st *>(StartEvent),
-          static_cast<CUevent_st *>(EndEvent)))
-    return getCUError(Result, "cuEventElapsedTime");
-  return Milliseconds * 1e-6;
-}
-
-Expected<void *> CUDAPlatform::rawMallocD(ptrdiff_t ByteCount,
-                                          int DeviceIndex) {
-  Status S = setContext(DeviceIndex);
-  if (S.isError())
-    return S;
-  if (!ByteCount)
-    return nullptr;
-  CUdeviceptr Pointer;
-  if (CUresult Result = cuMemAlloc(&Pointer, ByteCount))
-    return getCUError(Result, "cuMemAlloc");
-  return reinterpret_cast<void *>(Pointer);
-}
-
-static void cudaDestroyDeviceMemory(void *H) {
-  logCUWarning(cuMemFree(reinterpret_cast<CUdeviceptr>(H)), "cuMemFree");
-}
-
-HandleDestructor CUDAPlatform::getDeviceMemoryHandleDestructor() {
-  return cudaDestroyDeviceMemory;
-}
-
-void *CUDAPlatform::getDeviceMemorySpanHandle(void *BaseHandle, size_t,
-                                              size_t ByteOffset) {
-  return static_cast<char *>(BaseHandle) + ByteOffset;
-}
-
-void CUDAPlatform::rawDestroyDeviceMemorySpanHandle(void *) {
-  // Do nothing for this platform.
-}
-
-Expected<void *> CUDAPlatform::rawGetDeviceSymbolAddress(const void *Symbol,
-                                                         int DeviceIndex) {
-  Status S = setContext(DeviceIndex);
-  if (S.isError())
-    return S;
-  void *Address;
-  if (cudaError_t Status = cudaGetSymbolAddress(&Address, Symbol))
-    return getCUDAError(Status, "cudaGetSymbolAddress");
-  return Address;
-}
-
-Expected<ptrdiff_t> CUDAPlatform::rawGetDeviceSymbolSize(const void *Symbol,
-                                                         int DeviceIndex) {
-  Status S = setContext(DeviceIndex);
-  if (S.isError())
-    return S;
-  size_t Size;
-  if (cudaError_t Status = cudaGetSymbolSize(&Size, Symbol))
-    return getCUDAError(Status, "cudaGetSymbolSize");
-  return Size;
-}
-
-static const void *offsetVoidPtr(const void *Ptr, ptrdiff_t ByteOffset) {
-  return static_cast<const void *>(static_cast<const char *>(Ptr) + ByteOffset);
-}
-
-static void *offsetVoidPtr(void *Ptr, ptrdiff_t ByteOffset) {
-  return static_cast<void *>(static_cast<char *>(Ptr) + ByteOffset);
-}
-
-Status CUDAPlatform::rawRegisterHostMem(const void *Memory,
-                                        ptrdiff_t ByteCount) {
-  unsigned int Flags = 0;
-  return getCUError(
-      cuMemHostRegister(const_cast<void *>(Memory), ByteCount, Flags),
-      "cuMemHostRegiser");
-}
-
-static void cudaUnregisterHostMemoryHandleDestructor(void *H) {
-  logCUWarning(cuMemHostUnregister(H), "cuMemHostUnregister");
-}
-
-HandleDestructor CUDAPlatform::getUnregisterHostMemoryHandleDestructor() {
-  return cudaUnregisterHostMemoryHandleDestructor;
-}
-
-Expected<void *> CUDAPlatform::rawMallocRegisteredH(ptrdiff_t ByteCount) {
-  unsigned int Flags = 0;
-  void *Memory;
-  if (CUresult Result = cuMemHostAlloc(&Memory, ByteCount, Flags))
-    return getCUError(Result, "cuMemHostAlloc");
-  return Memory;
-}
-
-static void cudaFreeHostMemoryHandleDestructor(void *H) {
-  logCUWarning(cuMemFreeHost(H), "cuMemFreeHost");
-}
-
-HandleDestructor CUDAPlatform::getFreeHostMemoryHandleDestructor() {
-  return cudaFreeHostMemoryHandleDestructor;
-}
-
-Status CUDAPlatform::asyncCopyDToD(const void *DeviceSrc,
-                                   ptrdiff_t DeviceSrcByteOffset,
-                                   void *DeviceDst,
-                                   ptrdiff_t DeviceDstByteOffset,
-                                   ptrdiff_t ByteCount, void *Stream) {
-  return getCUError(
-      cuMemcpyDtoDAsync(reinterpret_cast<CUdeviceptr>(
-                            offsetVoidPtr(DeviceDst, DeviceDstByteOffset)),
-                        reinterpret_cast<CUdeviceptr>(
-                            offsetVoidPtr(DeviceSrc, DeviceSrcByteOffset)),
-                        ByteCount, static_cast<CUstream_st *>(Stream)),
-      "cuMemcpyDtoDAsync");
-}
-
-Status CUDAPlatform::asyncCopyDToH(const void *DeviceSrc,
-                                   ptrdiff_t DeviceSrcByteOffset, void *HostDst,
-                                   ptrdiff_t ByteCount, void *Stream) {
-  return getCUError(
-      cuMemcpyDtoHAsync(HostDst, reinterpret_cast<CUdeviceptr>(offsetVoidPtr(
-                                     DeviceSrc, DeviceSrcByteOffset)),
-                        ByteCount, static_cast<CUstream_st *>(Stream)),
-      "cuMemcpyDtoHAsync");
-}
-
-Status CUDAPlatform::asyncCopyHToD(const void *HostSrc, void *DeviceDst,
-                                   ptrdiff_t DeviceDstByteOffset,
-                                   ptrdiff_t ByteCount, void *Stream) {
-  return getCUError(
-      cuMemcpyHtoDAsync(reinterpret_cast<CUdeviceptr>(
-                            offsetVoidPtr(DeviceDst, DeviceDstByteOffset)),
-                        HostSrc, ByteCount, static_cast<CUstream_st *>(Stream)),
-      "cuMemcpyHtoDAsync");
-}
-
-Status CUDAPlatform::asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
-                                  ptrdiff_t ByteCount, char ByteValue,
-                                  void *Stream) {
-  return getCUError(
-      cuMemsetD8Async(
-          reinterpret_cast<CUdeviceptr>(offsetVoidPtr(DeviceDst, ByteOffset)),
-          ByteValue, ByteCount, static_cast<CUstream_st *>(Stream)),
-      "cuMemsetD8Async");
-}
-
-struct StreamCallbackUserData {
-  StreamCallbackUserData(Stream &Stream, StreamCallback Function)
-      : TheStream(Stream), TheFunction(std::move(Function)) {}
-
-  Stream &TheStream;
-  StreamCallback TheFunction;
-};
-
-static void CUDA_CB cuStreamCallbackShim(CUstream HStream, CUresult Status,
-                                         void *UserData) {
-  std::unique_ptr<StreamCallbackUserData> Data(
-      static_cast<StreamCallbackUserData *>(UserData));
-  Stream &TheStream = Data->TheStream;
-  assert(static_cast<CUstream_st *>(TheStream) == HStream);
-  Data->TheFunction(TheStream,
-                    getCUError(Status, "stream callback error state"));
-}
-
-Status CUDAPlatform::addStreamCallback(Stream &Stream,
-                                       StreamCallback Callback) {
-  // CUDA docs say flags must always be 0 here.
-  unsigned int Flags = 0u;
-  std::unique_ptr<StreamCallbackUserData> UserData(
-      new StreamCallbackUserData(Stream, std::move(Callback)));
-  return getCUError(cuStreamAddCallback(Stream, cuStreamCallbackShim,
-                                        UserData.release(), Flags),
-                    "cuStreamAddCallback");
-}
-
-static void cudaDestroyProgram(void *H) {
-  logCUWarning(cuModuleUnload(static_cast<CUmod_st *>(H)), "cuModuleUnload");
-}
-
-Expected<Program> CUDAPlatform::createProgramFromSource(Span<const char> Source,
-                                                        int DeviceIndex) {
-  Status S = setContext(DeviceIndex);
-  if (S.isError())
-    return S;
-  CUmodule Module;
-  constexpr int LogBufferSizeBytes = 1024;
-  char InfoLogBuffer[LogBufferSizeBytes];
-  char ErrorLogBuffer[LogBufferSizeBytes];
-  constexpr size_t OptionsCount = 4;
-  std::array<CUjit_option, OptionsCount> OptionNames = {
-      {CU_JIT_INFO_LOG_BUFFER, CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
-       CU_JIT_ERROR_LOG_BUFFER, CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES}};
-  std::array<void *, OptionsCount> OptionValues = {
-      {InfoLogBuffer, const_cast<int *>(&LogBufferSizeBytes), ErrorLogBuffer,
-       const_cast<int *>(&LogBufferSizeBytes)}};
-  if (CUresult Result =
-          cuModuleLoadDataEx(&Module, Source.data(), OptionsCount,
-                             OptionNames.data(), OptionValues.data())) {
-    InfoLogBuffer[LogBufferSizeBytes - 1] = '\0';
-    ErrorLogBuffer[LogBufferSizeBytes - 1] = '\0';
-    std::ostringstream OutStream;
-    OutStream << "Error creating program from source: "
-              << getCUErrorMessage(Result)
-              << "\nINFO MESSAGES\n================\n"
-              << InfoLogBuffer << "\nERROR MESSAGES\n==================\n"
-              << ErrorLogBuffer;
-    return Status(OutStream.str());
-  }
-  return constructProgram(this, Module, cudaDestroyProgram);
-}
-
-Expected<void *> CUDAPlatform::rawCreateKernel(void *Program,
-                                               const std::string &Name) {
-  CUmodule Module = static_cast<CUmodule>(Program);
-  CUfunction Kernel;
-  if (CUresult Result = cuModuleGetFunction(&Kernel, Module, Name.c_str()))
-    return getCUError(Result, "cuModuleGetFunction");
-  return Kernel;
-}
-
-static void cudaDestroyKernel(void *) {
-  // Do nothing.
-}
-
-HandleDestructor CUDAPlatform::getKernelHandleDestructor() {
-  return cudaDestroyKernel;
-}
-
-Status CUDAPlatform::rawEnqueueKernelLaunch(
-    void *Stream, void *Kernel, KernelLaunchDimensions LaunchDimensions,
-    Span<void *> Arguments, Span<size_t>, size_t SharedMemoryBytes) {
-  return getCUError(
-      cuLaunchKernel(static_cast<CUfunction>(Kernel), LaunchDimensions.GridX,
-                     LaunchDimensions.GridY, LaunchDimensions.GridZ,
-                     LaunchDimensions.BlockX, LaunchDimensions.BlockY,
-                     LaunchDimensions.BlockZ, SharedMemoryBytes,
-                     static_cast<CUstream>(Stream), Arguments.data(), nullptr),
-      "cuLaunchKernel");
-}
-
-} // namespace
-
-namespace cuda {
-
-/// Gets the CUDAPlatform instance and returns it as an unowned pointer to a
-/// Platform.
-Expected<Platform *> getPlatform() {
-  static auto MaybePlatform = []() -> Expected<CUDAPlatform *> {
-    Expected<CUDAPlatform> CreationResult = CUDAPlatform::create();
-    if (CreationResult.isError())
-      return CreationResult.getError();
-    else
-      return new CUDAPlatform(CreationResult.takeValue());
-  }();
-  return MaybePlatform;
-}
-
-} // namespace cuda
-
-} // namespace acxxel
--- a/parallel-libs/acxxel/customdoxygen.css
+++ b/parallel-libs/acxxel/customdoxygen.css
@ -1,20 +0,0 @@
-body {
-  background-color: #e0e0eb;
-}
-
-div.header {
-  margin-left: auto;
-  margin-right: auto;
-  max-width: 60em;
-  padding-left: 2em;
-  padding-right: 2em;
-}
-
-div.contents {
-  margin-left: auto;
-  margin-right: auto;
-  max-width: 60em;
-  background-color: white;
-  padding: 2em;
-  border-radius: 1em;
-}
--- a/parallel-libs/acxxel/examples/CMakeLists.txt
+++ b/parallel-libs/acxxel/examples/CMakeLists.txt
@ -1,12 +0,0 @@
-set(CUDA_HOST_COMPILER gcc)
-set(CUDA_NVCC_FLAGS -std=c++11)
-
-if(ACXXEL_ENABLE_CUDA)
-cuda_add_executable(simple_example simple_example.cu)
-target_link_libraries(simple_example acxxel)
-endif()
-
-if(ACXXEL_ENABLE_OPENCL)
-add_executable(opencl_example opencl_example.cpp)
-target_link_libraries(opencl_example acxxel ${OpenCL_LIBRARIES})
-endif()
--- a/parallel-libs/acxxel/examples/opencl_example.cpp
+++ b/parallel-libs/acxxel/examples/opencl_example.cpp
@ -1,69 +0,0 @@
-//===--- opencl_example.cpp - Example of using Acxxel with OpenCL ---------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// This file is an example of using OpenCL with Acxxel.
-///
-//===----------------------------------------------------------------------===//
-
-#include "acxxel.h"
-
-#include <array>
-#include <cstdio>
-#include <cstring>
-
-static const char *SaxpyKernelSource = R"(
-__kernel void saxpyKernel(float A, __global float *X, __global float *Y, int N) {
-  int I = get_global_id(0);
-  if (I < N)
-    X[I] = A * X[I] + Y[I];
-}
-)";
-
-template <size_t N>
-void saxpy(float A, std::array<float, N> &X, const std::array<float, N> &Y) {
-  acxxel::Platform *OpenCL = acxxel::getOpenCLPlatform().getValue();
-  acxxel::Stream Stream = OpenCL->createStream().takeValue();
-  auto DeviceX = OpenCL->mallocD<float>(N).takeValue();
-  auto DeviceY = OpenCL->mallocD<float>(N).takeValue();
-  Stream.syncCopyHToD(X, DeviceX).syncCopyHToD(Y, DeviceY);
-  acxxel::Program Program =
-      OpenCL
-          ->createProgramFromSource(acxxel::Span<const char>(
-              SaxpyKernelSource, std::strlen(SaxpyKernelSource)))
-          .takeValue();
-  acxxel::Kernel Kernel = Program.createKernel("saxpyKernel").takeValue();
-  float *RawX = static_cast<float *>(DeviceX);
-  float *RawY = static_cast<float *>(DeviceY);
-  int IntLength = N;
-  void *Arguments[] = {&A, &RawX, &RawY, &IntLength};
-  size_t ArgumentSizes[] = {sizeof(float), sizeof(float *), sizeof(float *),
-                            sizeof(int)};
-  acxxel::Status Status =
-      Stream.asyncKernelLaunch(Kernel, N, Arguments, ArgumentSizes)
-          .syncCopyDToH(DeviceX, X)
-          .sync();
-  if (Status.isError()) {
-    std::fprintf(stderr, "Error during saxpy: %s\n",
-                 Status.getMessage().c_str());
-    std::exit(EXIT_FAILURE);
-  }
-}
-
-int main() {
-  float A = 2.f;
-  std::array<float, 3> X{{0.f, 1.f, 2.f}};
-  std::array<float, 3> Y{{3.f, 4.f, 5.f}};
-  std::array<float, 3> Expected{{3.f, 6.f, 9.f}};
-  saxpy(A, X, Y);
-  for (int I = 0; I < 3; ++I)
-    if (X[I] != Expected[I]) {
-      std::fprintf(stderr, "Mismatch at position %d, %f != %f\n", I, X[I],
-                   Expected[I]);
-      std::exit(EXIT_FAILURE);
-    }
-}
--- a/parallel-libs/acxxel/examples/simple_example.cu
+++ b/parallel-libs/acxxel/examples/simple_example.cu
@ -1,109 +0,0 @@
-//===--- simple_example.cu - Simple example of using Acxxel ---------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// This file is a simple example of using Acxxel.
-///
-//===----------------------------------------------------------------------===//
-
-/// [Example simple saxpy]
-#include "acxxel.h"
-
-#include <array>
-#include <cstdio>
-#include <cstdlib>
-
-// A standard CUDA kernel.
-__global__ void saxpyKernel(float A, float *X, float *Y, int N) {
-  int I = (blockDim.x * blockIdx.x) + threadIdx.x;
-  if (I < N)
-    X[I] = A * X[I] + Y[I];
-}
-
-// A host library wrapping the CUDA kernel. All Acxxel calls are in here.
-template <size_t N>
-void saxpy(float A, std::array<float, N> &X, const std::array<float, N> &Y) {
-  // Get the CUDA platform and make a CUDA stream.
-  acxxel::Platform *CUDA = acxxel::getCUDAPlatform().getValue();
-  acxxel::Stream Stream = CUDA->createStream().takeValue();
-
-  // Allocate space for device arrays.
-  auto DeviceX = CUDA->mallocD<float>(N).takeValue();
-  auto DeviceY = CUDA->mallocD<float>(N).takeValue();
-
-  // Copy X and Y out to the device.
-  Stream.syncCopyHToD(X, DeviceX).syncCopyHToD(Y, DeviceY);
-
-  // Launch the kernel using triple-chevron notation.
-  saxpyKernel<<<1, N, 0, Stream>>>(A, DeviceX, DeviceY, N);
-
-  // Copy the results back to the host.
-  acxxel::Status Status = Stream.syncCopyDToH(DeviceX, X).takeStatus();
-
-  // Check for any errors.
-  if (Status.isError()) {
-    std::fprintf(stderr, "Error performing acxxel saxpy: %s\n",
-                 Status.getMessage().c_str());
-    std::exit(EXIT_FAILURE);
-  }
-}
-/// [Example simple saxpy]
-
-/// [Example CUDA simple saxpy]
-template <size_t N>
-void cudaSaxpy(float A, std::array<float, N> &X, std::array<float, N> &Y) {
-  // This size is needed all over the place, so give it a name.
-  constexpr size_t Size = N * sizeof(float);
-
-  // Allocate space for device arrays.
-  float *DeviceX;
-  float *DeviceY;
-  cudaMalloc(&DeviceX, Size);
-  cudaMalloc(&DeviceY, Size);
-
-  // Copy X and Y out to the device.
-  cudaMemcpy(DeviceX, X.data(), Size, cudaMemcpyHostToDevice);
-  cudaMemcpy(DeviceY, Y.data(), Size, cudaMemcpyHostToDevice);
-
-  // Launch the kernel using triple-chevron notation.
-  saxpyKernel<<<1, N>>>(A, DeviceX, DeviceY, N);
-
-  // Copy the results back to the host.
-  cudaMemcpy(X.data(), DeviceX, Size, cudaMemcpyDeviceToHost);
-
-  // Free resources.
-  cudaFree(DeviceX);
-  cudaFree(DeviceY);
-
-  // Check for any errors.
-  cudaError_t Error = cudaGetLastError();
-  if (Error) {
-    std::fprintf(stderr, "Error performing cudart saxpy: %s\n",
-                 cudaGetErrorString(Error));
-    std::exit(EXIT_FAILURE);
-  }
-}
-/// [Example CUDA simple saxpy]
-
-template <typename F> void testSaxpy(F &&SaxpyFunction) {
-  float A = 2.f;
-  std::array<float, 3> X = {{0.f, 1.f, 2.f}};
-  std::array<float, 3> Y = {{3.f, 4.f, 5.f}};
-  std::array<float, 3> Expected = {{3.f, 6.f, 9.f}};
-  SaxpyFunction(A, X, Y);
-  for (int I = 0; I < 3; ++I)
-    if (X[I] != Expected[I]) {
-      std::fprintf(stderr, "Result mismatch at index %d, %f != %f\n", I, X[I],
-                   Expected[I]);
-      std::exit(EXIT_FAILURE);
-    }
-}
-
-int main() {
-  testSaxpy(saxpy<3>);
-  testSaxpy(cudaSaxpy<3>);
-}
--- a/parallel-libs/acxxel/opencl_acxxel.cpp
+++ b/parallel-libs/acxxel/opencl_acxxel.cpp
@ -1,550 +0,0 @@
-//===--- opencl_acxxel.cpp - OpenCL implementation of the Acxxel API ------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// This file defines the standard OpenCL implementation of the Acxxel API.
-///
-//===----------------------------------------------------------------------===//
-
-#include "acxxel.h"
-
-#include "CL/cl.h"
-
-#include <mutex>
-#include <sstream>
-#include <utility>
-#include <vector>
-
-namespace acxxel {
-
-namespace {
-
-/// An ID containing the platform ID and the device ID within the platform.
-struct FullDeviceID {
-  cl_platform_id PlatformID;
-  cl_device_id DeviceID;
-
-  FullDeviceID(cl_platform_id PlatformID, cl_device_id DeviceID)
-      : PlatformID(PlatformID), DeviceID(DeviceID) {}
-};
-
-static std::string getOpenCLErrorMessage(cl_int Result) {
-  if (!Result)
-    return "success";
-  std::ostringstream OutStream;
-  OutStream << "OpenCL error: code = " << Result;
-  return OutStream.str();
-}
-
-static Status getOpenCLError(cl_int Result, const std::string &Message) {
-  if (!Result)
-    return Status();
-  std::ostringstream OutStream;
-  OutStream << getOpenCLErrorMessage(Result) << ", message = " << Message;
-  return Status(OutStream.str());
-}
-
-static void logOpenCLWarning(cl_int Result, const std::string &Message) {
-  if (Result) {
-    std::ostringstream OutStream;
-    OutStream << Message << ": " << getOpenCLErrorMessage(Result);
-    logWarning(OutStream.str());
-  }
-}
-
-class OpenCLPlatform : public Platform {
-public:
-  ~OpenCLPlatform() override = default;
-
-  static Expected<OpenCLPlatform> create();
-
-  Expected<int> getDeviceCount() override;
-
-  Expected<Stream> createStream(int DeviceIndex) override;
-
-  Expected<Event> createEvent(int DeviceIndex) override;
-
-  Expected<Program> createProgramFromSource(Span<const char> Source,
-                                            int DeviceIndex) override;
-
-protected:
-  Status streamSync(void *Stream) override;
-
-  Status streamWaitOnEvent(void *Stream, void *Event) override;
-
-  Expected<void *> rawMallocD(ptrdiff_t ByteCount, int DeviceIndex) override;
-  HandleDestructor getDeviceMemoryHandleDestructor() override;
-  void *getDeviceMemorySpanHandle(void *BaseHandle, size_t ByteSize,
-                                  size_t ByteOffset) override;
-  void rawDestroyDeviceMemorySpanHandle(void *Handle) override;
-
-  Expected<void *> rawGetDeviceSymbolAddress(const void *Symbol,
-                                             int DeviceIndex) override;
-  Expected<ptrdiff_t> rawGetDeviceSymbolSize(const void *Symbol,
-                                             int DeviceIndex) override;
-
-  Status rawRegisterHostMem(const void *Memory, ptrdiff_t ByteCount) override;
-  HandleDestructor getUnregisterHostMemoryHandleDestructor() override;
-
-  Expected<void *> rawMallocRegisteredH(ptrdiff_t ByteCount) override;
-  HandleDestructor getFreeHostMemoryHandleDestructor() override;
-
-  Status asyncCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
-                       void *DeviceDst, ptrdiff_t DeviceDstByteOffset,
-                       ptrdiff_t ByteCount, void *Stream) override;
-  Status asyncCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
-                       void *HostDst, ptrdiff_t ByteCount,
-                       void *Stream) override;
-  Status asyncCopyHToD(const void *HostSrc, void *DeviceDst,
-                       ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount,
-                       void *Stream) override;
-
-  Status asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
-                      ptrdiff_t ByteCount, char ByteValue,
-                      void *Stream) override;
-
-  Status addStreamCallback(Stream &Stream, StreamCallback Callback) override;
-
-  Status enqueueEvent(void *Event, void *Stream) override;
-  bool eventIsDone(void *Event) override;
-  Status eventSync(void *Event) override;
-  Expected<float> getSecondsBetweenEvents(void *StartEvent,
-                                          void *EndEvent) override;
-
-  Expected<void *> rawCreateKernel(void *Program,
-                                   const std::string &Name) override;
-  HandleDestructor getKernelHandleDestructor() override;
-
-  Status rawEnqueueKernelLaunch(void *Stream, void *Kernel,
-                                KernelLaunchDimensions LaunchDimensions,
-                                Span<void *> Arguments,
-                                Span<size_t> ArgumentSizes,
-                                size_t SharedMemoryBytes) override;
-
-private:
-  OpenCLPlatform(std::vector<FullDeviceID> &&FullDeviceIDs,
-                 std::vector<cl_context> &&Contexts,
-                 std::vector<cl_command_queue> &&CommandQueues)
-      : FullDeviceIDs(std::move(FullDeviceIDs)), Contexts(std::move(Contexts)),
-        CommandQueues(std::move(CommandQueues)) {}
-
-  std::vector<FullDeviceID> FullDeviceIDs;
-  std::vector<cl_context> Contexts;
-  std::vector<cl_command_queue> CommandQueues;
-};
-
-Expected<OpenCLPlatform> OpenCLPlatform::create() {
-  constexpr cl_uint MaxNumEntries = 100;
-  cl_platform_id Platforms[MaxNumEntries];
-  cl_uint NumPlatforms;
-  if (cl_int Result = clGetPlatformIDs(MaxNumEntries, Platforms, &NumPlatforms))
-    return getOpenCLError(Result, "clGetPlatformIDs");
-
-  std::vector<FullDeviceID> FullDeviceIDs;
-  for (cl_uint PlatformIndex = 0; PlatformIndex < NumPlatforms;
-       ++PlatformIndex) {
-    cl_uint NumDevices;
-    cl_device_id Devices[MaxNumEntries];
-    if (cl_int Result =
-            clGetDeviceIDs(Platforms[PlatformIndex], CL_DEVICE_TYPE_ALL,
-                           MaxNumEntries, Devices, &NumDevices))
-      return getOpenCLError(Result, "clGetDeviceIDs");
-    for (cl_uint DeviceIndex = 0; DeviceIndex < NumDevices; ++DeviceIndex)
-      FullDeviceIDs.emplace_back(Platforms[PlatformIndex],
-                                 Devices[DeviceIndex]);
-  }
-
-  if (FullDeviceIDs.empty())
-    return Status("No OpenCL device available on this system.");
-
-  std::vector<cl_context> Contexts(FullDeviceIDs.size());
-  std::vector<cl_command_queue> CommandQueues(FullDeviceIDs.size());
-  for (size_t I = 0; I < FullDeviceIDs.size(); ++I) {
-    cl_int CreateContextResult;
-    Contexts[I] = clCreateContext(nullptr, 1, &FullDeviceIDs[I].DeviceID,
-                                  nullptr, nullptr, &CreateContextResult);
-    if (CreateContextResult)
-      return getOpenCLError(CreateContextResult, "clCreateContext");
-
-    cl_int CreateCommandQueueResult;
-    CommandQueues[I] = clCreateCommandQueue(
-        Contexts[I], FullDeviceIDs[I].DeviceID, CL_QUEUE_PROFILING_ENABLE,
-        &CreateCommandQueueResult);
-    if (CreateCommandQueueResult)
-      return getOpenCLError(CreateCommandQueueResult, "clCreateCommandQueue");
-  }
-
-  return OpenCLPlatform(std::move(FullDeviceIDs), std::move(Contexts),
-                        std::move(CommandQueues));
-}
-
-Expected<int> OpenCLPlatform::getDeviceCount() { return FullDeviceIDs.size(); }
-
-static void openCLDestroyStream(void *H) {
-  logOpenCLWarning(clReleaseCommandQueue(static_cast<cl_command_queue>(H)),
-                   "clReleaseCommandQueue");
-}
-
-Expected<Stream> OpenCLPlatform::createStream(int DeviceIndex) {
-  cl_int Result;
-  cl_command_queue Queue = clCreateCommandQueue(
-      Contexts[DeviceIndex], FullDeviceIDs[DeviceIndex].DeviceID,
-      CL_QUEUE_PROFILING_ENABLE, &Result);
-  if (Result)
-    return getOpenCLError(Result, "clCreateCommandQueue");
-  return constructStream(this, DeviceIndex, Queue, openCLDestroyStream);
-}
-
-static void openCLEventDestroy(void *H) {
-  cl_event *CLEvent = static_cast<cl_event *>(H);
-  logOpenCLWarning(clReleaseEvent(*CLEvent), "clReleaseEvent");
-  delete CLEvent;
-}
-
-Status OpenCLPlatform::streamSync(void *Stream) {
-  return getOpenCLError(clFinish(static_cast<cl_command_queue>(Stream)),
-                        "clFinish");
-}
-
-Status OpenCLPlatform::streamWaitOnEvent(void *Stream, void *Event) {
-  cl_event *CLEvent = static_cast<cl_event *>(Event);
-  return getOpenCLError(
-      clEnqueueBarrierWithWaitList(static_cast<cl_command_queue>(Stream), 1,
-                                   CLEvent, nullptr),
-      "clEnqueueMarkerWithWaitList");
-}
-
-Expected<Event> OpenCLPlatform::createEvent(int DeviceIndex) {
-  cl_int Result;
-  cl_event Event = clCreateUserEvent(Contexts[DeviceIndex], &Result);
-  if (Result)
-    return getOpenCLError(Result, "clCreateUserEvent");
-  if (cl_int Result = clSetUserEventStatus(Event, CL_COMPLETE))
-    return getOpenCLError(Result, "clSetUserEventStatus");
-  return constructEvent(this, DeviceIndex, new cl_event(Event),
-                        openCLEventDestroy);
-}
-
-static void openCLDestroyProgram(void *H) {
-  logOpenCLWarning(clReleaseProgram(static_cast<cl_program>(H)),
-                   "clReleaseProgram");
-}
-
-Expected<Program>
-OpenCLPlatform::createProgramFromSource(Span<const char> Source,
-                                        int DeviceIndex) {
-  cl_int Error;
-  const char *CSource = Source.data();
-  size_t SourceSize = Source.size();
-  cl_program Program = clCreateProgramWithSource(Contexts[DeviceIndex], 1,
-                                                 &CSource, &SourceSize, &Error);
-  if (Error)
-    return getOpenCLError(Error, "clCreateProgramWithSource");
-  cl_device_id DeviceID = FullDeviceIDs[DeviceIndex].DeviceID;
-  if (cl_int Error =
-          clBuildProgram(Program, 1, &DeviceID, nullptr, nullptr, nullptr))
-    return getOpenCLError(Error, "clBuildProgram");
-  return constructProgram(this, Program, openCLDestroyProgram);
-}
-
-Expected<void *> OpenCLPlatform::rawMallocD(ptrdiff_t ByteCount,
-                                            int DeviceIndex) {
-  cl_int Result;
-  cl_mem Memory = clCreateBuffer(Contexts[DeviceIndex], CL_MEM_READ_WRITE,
-                                 ByteCount, nullptr, &Result);
-  if (Result)
-    return getOpenCLError(Result, "clCreateBuffer");
-  return reinterpret_cast<void *>(Memory);
-}
-
-static void openCLDestroyDeviceMemory(void *H) {
-  logOpenCLWarning(clReleaseMemObject(static_cast<cl_mem>(H)),
-                   "clReleaseMemObject");
-}
-
-HandleDestructor OpenCLPlatform::getDeviceMemoryHandleDestructor() {
-  return openCLDestroyDeviceMemory;
-}
-
-void *OpenCLPlatform::getDeviceMemorySpanHandle(void *BaseHandle,
-                                                size_t ByteSize,
-                                                size_t ByteOffset) {
-  cl_int Error;
-  cl_buffer_region Region;
-  Region.origin = ByteOffset;
-  Region.size = ByteSize;
-  cl_mem SubBuffer =
-      clCreateSubBuffer(static_cast<cl_mem>(BaseHandle), 0,
-                        CL_BUFFER_CREATE_TYPE_REGION, &Region, &Error);
-  logOpenCLWarning(Error, "clCreateSubBuffer");
-  if (Error)
-    return nullptr;
-  return SubBuffer;
-}
-
-void OpenCLPlatform::rawDestroyDeviceMemorySpanHandle(void *Handle) {
-  openCLDestroyDeviceMemory(Handle);
-}
-
-Expected<void *>
-OpenCLPlatform::rawGetDeviceSymbolAddress(const void * /*Symbol*/,
-                                          int /*DeviceIndex*/) {
-  // This doesn't seem to have any equivalent in OpenCL.
-  return Status("not implemented");
-}
-
-Expected<ptrdiff_t>
-OpenCLPlatform::rawGetDeviceSymbolSize(const void * /*Symbol*/,
-                                       int /*DeviceIndex*/) {
-  // This doesn't seem to have any equivalent in OpenCL.
-  return Status("not implemented");
-}
-
-static void noOpHandleDestructor(void *) {}
-
-Status OpenCLPlatform::rawRegisterHostMem(const void * /*Memory*/,
-                                          ptrdiff_t /*ByteCount*/) {
-  // TODO(jhen): Do we want to do something to pin the memory here?
-  return Status();
-}
-
-HandleDestructor OpenCLPlatform::getUnregisterHostMemoryHandleDestructor() {
-  // TODO(jhen): Do we want to unpin the memory here?
-  return noOpHandleDestructor;
-}
-
-Expected<void *> OpenCLPlatform::rawMallocRegisteredH(ptrdiff_t ByteCount) {
-  // TODO(jhen): Do we want to do something to pin the memory here?
-  return std::malloc(ByteCount);
-}
-
-static void freeMemoryHandleDestructor(void *Memory) {
-  // TODO(jhen): Do we want to unpin the memory here?
-  std::free(Memory);
-}
-
-HandleDestructor OpenCLPlatform::getFreeHostMemoryHandleDestructor() {
-  return freeMemoryHandleDestructor;
-}
-
-Status OpenCLPlatform::asyncCopyDToD(const void *DeviceSrc,
-                                     ptrdiff_t DeviceSrcByteOffset,
-                                     void *DeviceDst,
-                                     ptrdiff_t DeviceDstByteOffset,
-                                     ptrdiff_t ByteCount, void *Stream) {
-  return getOpenCLError(
-      clEnqueueCopyBuffer(static_cast<cl_command_queue>(Stream),
-                          static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
-                          static_cast<cl_mem>(DeviceDst), DeviceSrcByteOffset,
-                          DeviceDstByteOffset, ByteCount, 0, nullptr, nullptr),
-      "clEnqueueCopyBuffer");
-}
-
-Status OpenCLPlatform::asyncCopyDToH(const void *DeviceSrc,
-                                     ptrdiff_t DeviceSrcByteOffset,
-                                     void *HostDst, ptrdiff_t ByteCount,
-                                     void *Stream) {
-  return getOpenCLError(
-      clEnqueueReadBuffer(static_cast<cl_command_queue>(Stream),
-                          static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
-                          CL_TRUE, DeviceSrcByteOffset, ByteCount, HostDst, 0,
-                          nullptr, nullptr),
-      "clEnqueueReadBuffer");
-}
-
-Status OpenCLPlatform::asyncCopyHToD(const void *HostSrc, void *DeviceDst,
-                                     ptrdiff_t DeviceDstByteOffset,
-                                     ptrdiff_t ByteCount, void *Stream) {
-  return getOpenCLError(
-      clEnqueueWriteBuffer(static_cast<cl_command_queue>(Stream),
-                           static_cast<cl_mem>(DeviceDst), CL_TRUE,
-                           DeviceDstByteOffset, ByteCount, HostSrc, 0, nullptr,
-                           nullptr),
-      "clEnqueueWriteBuffer");
-}
-
-Status OpenCLPlatform::asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
-                                    ptrdiff_t ByteCount, char ByteValue,
-                                    void *Stream) {
-  return getOpenCLError(
-      clEnqueueFillBuffer(static_cast<cl_command_queue>(Stream),
-                          static_cast<cl_mem>(DeviceDst), &ByteValue, 1,
-                          ByteOffset, ByteCount, 0, nullptr, nullptr),
-      "clEnqueueFillBuffer");
-}
-
-struct StreamCallbackUserData {
-  StreamCallbackUserData(Stream &TheStream, StreamCallback Function,
-                         cl_event EndEvent)
-      : TheStream(TheStream), TheFunction(std::move(Function)),
-        EndEvent(EndEvent) {}
-
-  Stream &TheStream;
-  StreamCallback TheFunction;
-  cl_event EndEvent;
-};
-
-// A function with the right signature to pass to clSetEventCallback.
-void CL_CALLBACK openCLStreamCallbackShim(cl_event,
-                                          cl_int EventCommandExecStatus,
-                                          void *UserData) {
-  std::unique_ptr<StreamCallbackUserData> Data(
-      static_cast<StreamCallbackUserData *>(UserData));
-  Data->TheFunction(
-      Data->TheStream,
-      getOpenCLError(EventCommandExecStatus, "stream callback error state"));
-  if (cl_int Result = clSetUserEventStatus(Data->EndEvent, CL_COMPLETE))
-    logOpenCLWarning(Result, "clSetUserEventStatus");
-  if (cl_int Result = clReleaseEvent(Data->EndEvent))
-    logOpenCLWarning(Result, "clReleaseEvent");
-}
-
-Status OpenCLPlatform::addStreamCallback(Stream &TheStream,
-                                         StreamCallback Callback) {
-  cl_int Result;
-  cl_event StartEvent =
-      clCreateUserEvent(Contexts[TheStream.getDeviceIndex()], &Result);
-  if (Result)
-    return getOpenCLError(Result, "clCreateUserEvent");
-  cl_event EndEvent =
-      clCreateUserEvent(Contexts[TheStream.getDeviceIndex()], &Result);
-  if (Result)
-    return getOpenCLError(Result, "clCreateUserEvent");
-  cl_event StartBarrierEvent;
-  if (cl_int Result = clEnqueueBarrierWithWaitList(
-          static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1,
-          &StartEvent, &StartBarrierEvent))
-    return getOpenCLError(Result, "clEnqueueBarrierWithWaitList");
-
-  if (cl_int Result = clEnqueueBarrierWithWaitList(
-          static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1,
-          &EndEvent, nullptr))
-    return getOpenCLError(Result, "clEnqueueBarrierWithWaitList");
-
-  std::unique_ptr<StreamCallbackUserData> UserData(
-      new StreamCallbackUserData(TheStream, std::move(Callback), EndEvent));
-  if (cl_int Result =
-          clSetEventCallback(StartBarrierEvent, CL_RUNNING,
-                             openCLStreamCallbackShim, UserData.release()))
-    return getOpenCLError(Result, "clSetEventCallback");
-
-  if (cl_int Result = clSetUserEventStatus(StartEvent, CL_COMPLETE))
-    return getOpenCLError(Result, "clSetUserEventStatus");
-
-  if (cl_int Result = clReleaseEvent(StartBarrierEvent))
-    return getOpenCLError(Result, "clReleaseEvent");
-
-  return getOpenCLError(clReleaseEvent(StartEvent), "clReleaseEvent");
-}
-
-Status OpenCLPlatform::enqueueEvent(void *Event, void *Stream) {
-  cl_event *CLEvent = static_cast<cl_event *>(Event);
-  cl_event OldEvent = *CLEvent;
-  cl_event NewEvent;
-  if (cl_int Result = clEnqueueMarkerWithWaitList(
-          static_cast<cl_command_queue>(Stream), 0, nullptr, &NewEvent))
-    return getOpenCLError(Result, "clEnqueueMarkerWithWaitList");
-  *CLEvent = NewEvent;
-  return getOpenCLError(clReleaseEvent(OldEvent), "clReleaseEvent");
-}
-
-bool OpenCLPlatform::eventIsDone(void *Event) {
-  cl_event *CLEvent = static_cast<cl_event *>(Event);
-  cl_int EventStatus;
-  logOpenCLWarning(clGetEventInfo(*CLEvent, CL_EVENT_COMMAND_EXECUTION_STATUS,
-                                  sizeof(EventStatus), &EventStatus, nullptr),
-                   "clGetEventInfo");
-  return EventStatus == CL_COMPLETE || EventStatus < 0;
-}
-
-Status OpenCLPlatform::eventSync(void *Event) {
-  cl_event *CLEvent = static_cast<cl_event *>(Event);
-  return getOpenCLError(clWaitForEvents(1, CLEvent), "clWaitForEvents");
-}
-
-Expected<float> OpenCLPlatform::getSecondsBetweenEvents(void *StartEvent,
-                                                        void *EndEvent) {
-  cl_event *CLStartEvent = static_cast<cl_event *>(StartEvent);
-  cl_event *CLEndEvent = static_cast<cl_event *>(EndEvent);
-
-  cl_profiling_info ParamName = CL_PROFILING_COMMAND_END;
-  cl_ulong StartNanoseconds;
-  cl_ulong EndNanoseconds;
-  if (cl_int Result =
-          clGetEventProfilingInfo(*CLStartEvent, ParamName, sizeof(cl_ulong),
-                                  &StartNanoseconds, nullptr))
-    return getOpenCLError(Result, "clGetEventProfilingInfo");
-  if (cl_int Result = clGetEventProfilingInfo(
-          *CLEndEvent, ParamName, sizeof(cl_ulong), &EndNanoseconds, nullptr))
-    return getOpenCLError(Result, "clGetEventProfilingInfo");
-  return (EndNanoseconds - StartNanoseconds) * 1e-12;
-}
-
-Expected<void *> OpenCLPlatform::rawCreateKernel(void *Program,
-                                                 const std::string &Name) {
-
-  cl_int Error;
-  cl_kernel Kernel =
-      clCreateKernel(static_cast<cl_program>(Program), Name.c_str(), &Error);
-  if (Error)
-    return getOpenCLError(Error, "clCreateKernel");
-  return Kernel;
-}
-
-static void openCLDestroyKernel(void *H) {
-  logOpenCLWarning(clReleaseKernel(static_cast<cl_kernel>(H)),
-                   "clReleaseKernel");
-}
-
-HandleDestructor OpenCLPlatform::getKernelHandleDestructor() {
-  return openCLDestroyKernel;
-}
-
-Status OpenCLPlatform::rawEnqueueKernelLaunch(
-    void *Stream, void *Kernel, KernelLaunchDimensions LaunchDimensions,
-    Span<void *> Arguments, Span<size_t> ArgumentSizes,
-    size_t SharedMemoryBytes) {
-  if (SharedMemoryBytes != 0)
-    return Status("OpenCL kernel launches only accept zero for the shared "
-                  "memory byte size");
-  cl_kernel TheKernel = static_cast<cl_kernel>(Kernel);
-  for (int I = 0; I < Arguments.size(); ++I)
-    if (cl_int Error =
-            clSetKernelArg(TheKernel, I, ArgumentSizes[I], Arguments[I]))
-      return getOpenCLError(Error, "clSetKernelArg");
-  size_t LocalWorkSize[] = {LaunchDimensions.BlockX, LaunchDimensions.BlockY,
-                            LaunchDimensions.BlockZ};
-  size_t GlobalWorkSize[] = {LaunchDimensions.BlockX * LaunchDimensions.GridX,
-                             LaunchDimensions.BlockY * LaunchDimensions.GridY,
-                             LaunchDimensions.BlockZ * LaunchDimensions.GridZ};
-  return getOpenCLError(
-      clEnqueueNDRangeKernel(static_cast<cl_command_queue>(Stream), TheKernel,
-                             3, nullptr, GlobalWorkSize, LocalWorkSize, 0,
-                             nullptr, nullptr),
-      "clEnqueueNDRangeKernel");
-}
-
-} // namespace
-
-namespace opencl {
-
-/// Gets an OpenCLPlatform instance and returns it as an unowned pointer to a
-/// Platform.
-Expected<Platform *> getPlatform() {
-  static auto MaybePlatform = []() -> Expected<OpenCLPlatform *> {
-    Expected<OpenCLPlatform> CreationResult = OpenCLPlatform::create();
-    if (CreationResult.isError())
-      return CreationResult.getError();
-    else
-      return new OpenCLPlatform(CreationResult.takeValue());
-  }();
-  return MaybePlatform;
-}
-
-} // namespace opencl
-
-} // namespace acxxel
--- a/parallel-libs/acxxel/span.h
+++ b/parallel-libs/acxxel/span.h
@ -1,221 +0,0 @@
-//===--- span- The span class -----------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ACXXEL_SPAN_H
-#define ACXXEL_SPAN_H
-
-#include <array>
-#include <cstddef>
-#include <exception>
-#include <iterator>
-#include <type_traits>
-
-namespace acxxel {
-
-/// Value used to indicate slicing to the end of the span.
-static constexpr std::ptrdiff_t dynamic_extent = -1; // NOLINT
-
-class SpanBase {};
-
-/// Implementation of the proposed C++17 std::span class.
-///
-/// Based on the paper:
-/// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0122r1.pdf
-template <typename ElementType> class Span : public SpanBase {
-public:
-  /// \name constants and types
-  /// \{
-
-  using element_type = ElementType;
-  using index_type = std::ptrdiff_t;
-  using pointer = element_type *;
-  using reference = element_type &;
-  using iterator = element_type *;
-  using const_iterator = const element_type *;
-  using value_type = typename std::remove_const<element_type>::type;
-
-  /// \}
-
-  /// \name constructors, copy, assignment, and destructor.
-  /// \{
-
-  /// Constructs an empty span with null pointer data.
-  Span() : Data(nullptr), Size(0) {}
-
-  /// Constructs an empty span with null pointer data.
-  // Intentionally implicit.
-  Span(std::nullptr_t) : Data(nullptr), Size(0) {}
-
-  /// Constructs a span from a pointer and element count.
-  Span(pointer Ptr, index_type Count) : Data(Ptr), Size(Count) {
-    if (Count < 0 || (!Ptr && Count))
-      std::terminate();
-  }
-
-  /// Constructs a span from a pointer to the fist element in the range and a
-  /// pointer to one past the last element in the range.
-  Span(pointer FirstElem, pointer LastElem)
-      : Data(FirstElem), Size(std::distance(FirstElem, LastElem)) {
-    if (Size < 0)
-      std::terminate();
-  }
-
-  /// Constructs a span from an array.
-  // Intentionally implicit.
-  template <typename T, size_t N> Span(T (&Arr)[N]) : Data(Arr), Size(N) {}
-
-  /// Constructs a span from a std::array.
-  // Intentionally implicit.
-  template <size_t N>
-  Span(const std::array<typename std::remove_const<element_type>::type, N> &Arr)
-      : Data(Arr.data()), Size(N) {}
-
-  /// Constructs a span from a container such as a std::vector.
-  // TODO(jhen): Put in a check to make sure this constructor does not
-  // participate in overload resolution unless Container meets the following
-  // requirements:
-  //  * Container is a contiguous container and a sequence container.
-  // Intentionally implicit.
-  template <typename Container>
-  Span(Container &Cont,
-       typename std::enable_if<
-           std::is_same<
-               typename std::remove_const<typename Container::value_type>::type,
-               typename std::remove_const<element_type>::type>::value &&
-           !std::is_array<Container>::value &&
-           !std::is_base_of<SpanBase, Container>::value &&
-           std::is_convertible<decltype(&Cont[0]), pointer>::value>::type * =
-           nullptr)
-      : Data(Cont.data()), Size(Cont.size()) {}
-
-  /// Avoids creating spans from expiring temporary objects.
-  // TODO(jhen): Put in a check to make sure this constructor does not
-  // participate in overload resolution unless Container meets the following
-  // requirements:
-  //  * Container is a contiguous container and a sequence container.
-  template <typename Container>
-  Span(Container &&Cont,
-       typename std::enable_if<
-           std::is_same<
-               typename std::remove_const<typename Container::value_type>::type,
-               typename std::remove_const<element_type>::type>::value &&
-           !std::is_array<Container>::value &&
-           !std::is_base_of<SpanBase, Container>::value &&
-           std::is_convertible<decltype(&Cont[0]), pointer>::value>::type * =
-           nullptr) = delete;
-
-  Span(const Span &) noexcept = default;
-  Span(Span &&) noexcept;
-
-  /// Constructs a span from copying a span of another type that can be
-  /// implicitly converted to the type stored by the constructed span.
-  // Intentionally implicit.
-  template <typename OtherElementType>
-  Span(const Span<OtherElementType> &Other)
-      : Data(Other.Data), Size(Other.Size) {}
-
-  /// Constructs a span from moving a span of another type that can be
-  /// implicitly converted to the type stored by the constructed span.
-  // Intentionally implicit.
-  template <typename OtherElementType>
-  Span(Span<OtherElementType> &&Other) : Data(Other.Data), Size(Other.Size) {}
-
-  ~Span() = default;
-
-  Span &operator=(const Span &) noexcept = default;
-  Span &operator=(Span &&) noexcept;
-
-  /// \}
-
-  /// \name subviews
-  /// \{
-
-  /// Creates a span out of the first Count elements of this span.
-  Span<element_type> first(index_type Count) const {
-    bool Valid = Count >= 0 && Count <= size();
-    if (!Valid)
-      std::terminate();
-    return Span<element_type>(data(), Count);
-  }
-
-  /// Creates a span out of the last Count elements of this span.
-  Span<element_type> last(index_type Count) const {
-    bool Valid = Count >= 0 && Count <= size();
-    if (!Valid)
-      std::terminate();
-    return Span<element_type>(Count == 0 ? data() : data() + (size() - Count),
-                              Count);
-  }
-
-  /// Creates a span out of the Count elements of this span beginning at Offset.
-  ///
-  /// If no arguments is provided for Count, the new span will extend to the end
-  /// of the current span.
-  Span<element_type> subspan(index_type Offset,
-                             index_type Count = dynamic_extent) const {
-    bool Valid =
-        (Offset == 0 || (Offset > 0 && Offset <= size())) &&
-        (Count == dynamic_extent || (Count >= 0 && Offset + Count <= size()));
-    if (!Valid)
-      std::terminate();
-    return Span<element_type>(
-        data() + Offset, Count == dynamic_extent ? size() - Offset : Count);
-  }
-
-  /// \}
-
-  /// \name observers
-  /// \{
-
-  index_type length() const { return Size; }
-  index_type size() const { return Size; }
-  bool empty() const { return size() == 0; }
-
-  /// \}
-
-  /// \name element access
-  /// \{
-
-  reference operator[](index_type Idx) const {
-    bool Valid = Idx >= 0 && Idx < size();
-    if (!Valid)
-      std::terminate();
-    return Data[Idx];
-  }
-
-  reference operator()(index_type Idx) const { return operator[](Idx); }
-
-  pointer data() const noexcept { return Data; }
-
-  /// \}
-
-  /// \name iterator support
-  /// \{
-
-  iterator begin() const noexcept { return Data; }
-  iterator end() const noexcept { return Data + Size; }
-  const_iterator cbegin() const noexcept { return Data; }
-  const_iterator cend() const noexcept { return Data + Size; }
-
-  /// \}
-
-private:
-  template <typename OtherElementType> friend class Span;
-
-  pointer Data;
-  index_type Size;
-};
-
-template <typename ElementType>
-Span<ElementType>::Span(Span &&) noexcept = default;
-template <typename ElementType>
-Span<ElementType> &Span<ElementType>::operator=(Span &&) noexcept = default;
-
-} // namespace acxxel
-
-#endif // ACXXEL_SPAN_H
--- a/parallel-libs/acxxel/status.h
+++ b/parallel-libs/acxxel/status.h
@ -1,235 +0,0 @@
-//===--- status.h - Status and Expected classes -----------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ACXXEL_STATUS_H
-#define ACXXEL_STATUS_H
-
-#include <cassert>
-#include <string>
-
-// The clang compiler supports annotating class declarations with the
-// warn_unused_result attribute, and this has the meaning that whenever that
-// type is returned from a function, the function is marked as
-// warn_unused_result.
-//
-// The gcc compiler does not support warn_unused_result for classes, only for
-// functions, so we only use this feature with clang.
-#ifdef __clang__
-#define ACXXEL_WARN_UNUSED_RESULT_TYPE __attribute__((warn_unused_result))
-#else
-#define ACXXEL_WARN_UNUSED_RESULT_TYPE
-#endif
-
-namespace acxxel {
-
-/// Status type.
-///
-/// May represent failure with a string error message, or may indicate success.
-class ACXXEL_WARN_UNUSED_RESULT_TYPE Status {
-public:
-  /// Creates a Status representing success.
-  Status() : HasMessage(false) {}
-
-  /// Creates a Status representing failure with the given error message.
-  explicit Status(const std::string &Message)
-      : HasMessage(true), Message(Message) {}
-
-  Status(const Status &) = default;
-
-  Status &operator=(const Status &) = default;
-
-  Status(Status &&) noexcept = default;
-
-  // Cannot use default because the move assignment operator for std::string is
-  // not marked noexcept.
-  Status &operator=(Status &&That) noexcept {
-    HasMessage = That.HasMessage;
-    Message = std::move(That.Message);
-    return *this;
-  }
-
-  ~Status() = default;
-
-  /// Returns true if this Status represents failure. Otherwise, returns false.
-  bool isError() const { return HasMessage; }
-
-  /// Returns true if this Status represents success. Otherwise, returns false.
-  operator bool() const { return !HasMessage; }
-
-  /// Gets a reference to the error message for this Status.
-  ///
-  /// Should only be called if isError() returns true.
-  const std::string &getMessage() const { return Message; }
-
-private:
-  bool HasMessage;
-  std::string Message;
-};
-
-class ExpectedBase {
-protected:
-  enum class State {
-    SUCCESS,
-    FAILURE,
-    MOVED,
-  };
-};
-
-/// Either a value of type T or a Status representing failure.
-template <typename T> class Expected : public ExpectedBase {
-public:
-  /// Creates an Expected representing failure with the given Error status.
-  // Intentionally implicit.
-  Expected(Status AnError)
-      : TheState(State::FAILURE), TheError(std::move(AnError)) {
-    assert(AnError.isError() && "constructing an error Expected value from a "
-                                "success status is not allowed");
-  }
-
-  /// Creates an Expected representing success with the given value.
-  // Intentionally implicit.
-  Expected(T Value) : TheState(State::SUCCESS), TheValue(std::move(Value)) {}
-
-  Expected(const Expected &That) : TheState(That.TheState) {
-    switch (TheState) {
-    case State::SUCCESS:
-      new (&TheValue) T(That.TheValue);
-      break;
-    case State::FAILURE:
-      new (&TheError) Status(That.TheError);
-      break;
-    case State::MOVED:
-      // Nothing to do in this case.
-      break;
-    }
-  }
-
-  Expected &operator=(Expected That) {
-    TheState = That.TheState;
-    switch (TheState) {
-    case State::SUCCESS:
-      new (&TheValue) T(std::move(That.TheValue));
-      break;
-    case State::FAILURE:
-      new (&TheError) Status(std::move(That.TheError));
-      break;
-    case State::MOVED:
-      // Nothing to do in this case.
-      break;
-    }
-    return *this;
-  }
-
-  Expected(Expected &&That) noexcept : TheState(That.TheState) {
-    switch (TheState) {
-    case State::SUCCESS:
-      new (&TheValue) T(std::move(That.TheValue));
-      break;
-    case State::FAILURE:
-      new (&TheError) Status(std::move(That.TheError));
-      break;
-    case State::MOVED:
-      // Nothing to do in this case.
-      break;
-    }
-    That.TheState = State::MOVED;
-  }
-
-  template <typename U>
-  Expected(const Expected<U> &That) : TheState(That.TheState) {
-    switch (TheState) {
-    case State::SUCCESS:
-      new (&TheValue) T(That.TheValue);
-      break;
-    case State::FAILURE:
-      new (&TheError) Status(That.TheError);
-      break;
-    case State::MOVED:
-      // Nothing to do in this case.
-      break;
-    }
-  }
-
-  template <typename U> Expected(Expected<U> &&That) : TheState(That.TheState) {
-    switch (TheState) {
-    case State::SUCCESS:
-      new (&TheValue) T(std::move(That.TheValue));
-      break;
-    case State::FAILURE:
-      new (&TheError) Status(std::move(That.TheError));
-      break;
-    case State::MOVED:
-      // Nothing to do in this case.
-      break;
-    }
-  }
-
-  ~Expected() {
-    switch (TheState) {
-    case State::SUCCESS:
-      TheValue.~T();
-      break;
-    case State::FAILURE:
-      TheError.~Status();
-      break;
-    case State::MOVED:
-      // Nothing to do for this case.
-      break;
-    }
-  }
-
-  /// Returns true if this instance represents failure.
-  bool isError() const { return TheState != State::SUCCESS; }
-
-  /// Gets a reference to the Status object.
-  ///
-  /// Should only be called if isError() returns true.
-  const Status &getError() const {
-    assert(isError());
-    return TheError;
-  }
-
-  /// Gets a const reference to the value object.
-  ///
-  /// Should only be called if isError() returns false.
-  const T &getValue() const {
-    assert(!isError());
-    return TheValue;
-  }
-
-  /// Gets a reference to the value object.
-  ///
-  /// Should only be called if isError() returns false.
-  T &getValue() {
-    assert(!isError());
-    return TheValue;
-  }
-
-  /// Takes the value from this object by moving it to the return value.
-  ///
-  /// Should only be called if isError() returns false.
-  T takeValue() {
-    assert(!isError());
-    TheState = State::MOVED;
-    return std::move(TheValue);
-  }
-
-private:
-  template <typename U> friend class Expected;
-
-  State TheState;
-
-  union {
-    T TheValue;
-    Status TheError;
-  };
-};
-
-} // namespace acxxel
-
-#endif // ACXXEL_STATUS_H
--- a/parallel-libs/acxxel/tests/CMakeLists.txt
+++ b/parallel-libs/acxxel/tests/CMakeLists.txt
@ -1,41 +0,0 @@
-add_executable(acxxel_test acxxel_test.cpp)
-target_link_libraries(
-    acxxel_test
-    acxxel
-    ${GTEST_BOTH_LIBRARIES}
-    ${CMAKE_THREAD_LIBS_INIT})
-add_test(AcxxelTest acxxel_test)
-
-add_executable(span_test span_test.cpp)
-target_link_libraries(
-    span_test
-    ${GTEST_BOTH_LIBRARIES}
-    ${CMAKE_THREAD_LIBS_INIT})
-add_test(SpanTest span_test)
-
-add_executable(status_test status_test.cpp)
-target_link_libraries(
-    status_test
-    ${GTEST_BOTH_LIBRARIES}
-    ${CMAKE_THREAD_LIBS_INIT})
-add_test(StatusTest status_test)
-
-if(ACXXEL_ENABLE_OPENCL)
-add_executable(opencl_test opencl_test.cpp)
-target_link_libraries(
-    opencl_test
-    acxxel
-    ${GTEST_BOTH_LIBRARIES}
-    ${CMAKE_THREAD_LIBS_INIT})
-add_test(OpenCLTest opencl_test)
-endif()
-
-if(ACXXEL_ENABLE_MULTI_DEVICE_UNIT_TESTS)
-add_executable(multi_device_test multi_device_test.cpp)
-target_link_libraries(
-    multi_device_test
-    acxxel
-    ${GTEST_BOTH_LIBRARIES}
-    ${CMAKE_THREAD_LIBS_INIT})
-add_test(MultiDeviceTest multi_device_test)
-endif()
--- a/parallel-libs/acxxel/tests/acxxel_test.cpp
+++ b/parallel-libs/acxxel/tests/acxxel_test.cpp
@ -1,419 +0,0 @@
-//===--- acxxel_test.cpp - Tests for the Acxxel API -----------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "acxxel.h"
-#include "config.h"
-#include "gtest/gtest.h"
-
-#include <chrono>
-#include <condition_variable>
-#include <mutex>
-#include <thread>
-
-namespace {
-
-template <typename T, size_t N> constexpr size_t arraySize(T (&)[N]) {
-  return N;
-}
-
-using PlatformGetter = acxxel::Expected<acxxel::Platform *> (*)();
-class AcxxelTest : public ::testing::TestWithParam<PlatformGetter> {};
-
-TEST_P(AcxxelTest, GetDeviceCount) {
-  acxxel::Platform *Platform = GetParam()().takeValue();
-  int DeviceCount = Platform->getDeviceCount().getValue();
-  EXPECT_GE(DeviceCount, 0);
-}
-
-// Tests all the methods of a DeviceMemorySpan that was created from the asSpan
-// method of a DeviceMemory object.
-//
-// The length is the number of elements in the span. The ElementByteSize is the
-// number of bytes per element in the span.
-//
-// It is assumed that the input span has 10 or more elements.
-template <typename SpanType>
-void testFullDeviceMemorySpan(SpanType &&Span, ptrdiff_t Length,
-                              ptrdiff_t ElementByteSize) {
-  EXPECT_GE(Length, 10);
-  EXPECT_GT(ElementByteSize, 0);
-
-  // Full span
-  EXPECT_EQ(Length, Span.length());
-  EXPECT_EQ(Length, Span.size());
-  EXPECT_EQ(Length * ElementByteSize, Span.byte_size());
-  EXPECT_EQ(0, Span.offset());
-  EXPECT_EQ(0, Span.byte_offset());
-  EXPECT_FALSE(Span.empty());
-
-  // Sub-span with first method.
-  auto First2 = Span.first(2);
-  EXPECT_EQ(2, First2.length());
-  EXPECT_EQ(2, First2.size());
-  EXPECT_EQ(2 * ElementByteSize, First2.byte_size());
-  EXPECT_EQ(0, First2.offset());
-  EXPECT_EQ(0, First2.byte_offset());
-  EXPECT_FALSE(First2.empty());
-
-  auto First0 = Span.first(0);
-  EXPECT_EQ(0, First0.length());
-  EXPECT_EQ(0, First0.size());
-  EXPECT_EQ(0, First0.byte_size());
-  EXPECT_EQ(0, First0.offset());
-  EXPECT_EQ(0, First0.byte_offset());
-  EXPECT_TRUE(First0.empty());
-
-  // Sub-span with last method.
-  auto Last2 = Span.last(2);
-  EXPECT_EQ(2, Last2.length());
-  EXPECT_EQ(2, Last2.size());
-  EXPECT_EQ(2 * ElementByteSize, Last2.byte_size());
-  EXPECT_EQ(Length - 2, Last2.offset());
-  EXPECT_EQ((Length - 2) * ElementByteSize, Last2.byte_offset());
-  EXPECT_FALSE(Last2.empty());
-
-  auto Last0 = Span.last(0);
-  EXPECT_EQ(0, Last0.length());
-  EXPECT_EQ(0, Last0.size());
-  EXPECT_EQ(0, Last0.byte_size());
-  EXPECT_EQ(Length, Last0.offset());
-  EXPECT_EQ(Length * ElementByteSize, Last0.byte_offset());
-  EXPECT_TRUE(Last0.empty());
-
-  // Sub-span with subspan method.
-  auto Middle2 = Span.subspan(4, 2);
-  EXPECT_EQ(2, Middle2.length());
-  EXPECT_EQ(2, Middle2.size());
-  EXPECT_EQ(2 * ElementByteSize, Middle2.byte_size());
-  EXPECT_EQ(4, Middle2.offset());
-  EXPECT_EQ(4 * ElementByteSize, Middle2.byte_offset());
-  EXPECT_FALSE(Middle2.empty());
-
-  auto Middle0 = Span.subspan(4, 0);
-  EXPECT_EQ(0, Middle0.length());
-  EXPECT_EQ(0, Middle0.size());
-  EXPECT_EQ(0, Middle0.byte_size());
-  EXPECT_EQ(4, Middle0.offset());
-  EXPECT_EQ(4 * ElementByteSize, Middle0.byte_offset());
-  EXPECT_TRUE(Middle0.empty());
-
-  auto Subspan2AtStart = Span.subspan(0, 2);
-  EXPECT_EQ(2, Subspan2AtStart.length());
-  EXPECT_EQ(2, Subspan2AtStart.size());
-  EXPECT_EQ(2 * ElementByteSize, Subspan2AtStart.byte_size());
-  EXPECT_EQ(0, Subspan2AtStart.offset());
-  EXPECT_EQ(0, Subspan2AtStart.byte_offset());
-  EXPECT_FALSE(Subspan2AtStart.empty());
-
-  auto Subspan2AtEnd = Span.subspan(Length - 2, 2);
-  EXPECT_EQ(2, Subspan2AtEnd.length());
-  EXPECT_EQ(2, Subspan2AtEnd.size());
-  EXPECT_EQ(2 * ElementByteSize, Subspan2AtEnd.byte_size());
-  EXPECT_EQ(Length - 2, Subspan2AtEnd.offset());
-  EXPECT_EQ((Length - 2) * ElementByteSize, Subspan2AtEnd.byte_offset());
-  EXPECT_FALSE(Subspan2AtEnd.empty());
-
-  auto Subspan0AtStart = Span.subspan(0, 0);
-  EXPECT_EQ(0, Subspan0AtStart.length());
-  EXPECT_EQ(0, Subspan0AtStart.size());
-  EXPECT_EQ(0, Subspan0AtStart.byte_size());
-  EXPECT_EQ(0, Subspan0AtStart.offset());
-  EXPECT_EQ(0, Subspan0AtStart.byte_offset());
-  EXPECT_TRUE(Subspan0AtStart.empty());
-
-  auto Subspan0AtEnd = Span.subspan(Length, 0);
-  EXPECT_EQ(0, Subspan0AtEnd.length());
-  EXPECT_EQ(0, Subspan0AtEnd.size());
-  EXPECT_EQ(0, Subspan0AtEnd.byte_size());
-  EXPECT_EQ(Length, Subspan0AtEnd.offset());
-  EXPECT_EQ(Length * ElementByteSize, Subspan0AtEnd.byte_offset());
-  EXPECT_TRUE(Subspan0AtEnd.empty());
-}
-
-TEST_P(AcxxelTest, DeviceMemory) {
-  acxxel::Platform *Platform = GetParam()().takeValue();
-  acxxel::Expected<acxxel::DeviceMemory<int>> MaybeMemory =
-      Platform->mallocD<int>(10);
-  EXPECT_FALSE(MaybeMemory.isError());
-
-  // ref
-  acxxel::DeviceMemory<int> &MemoryRef = MaybeMemory.getValue();
-  EXPECT_EQ(10, MemoryRef.length());
-  EXPECT_EQ(10, MemoryRef.size());
-  EXPECT_EQ(10 * sizeof(int), static_cast<size_t>(MemoryRef.byte_size()));
-  EXPECT_FALSE(MemoryRef.empty());
-
-  // mutable span
-  acxxel::DeviceMemorySpan<int> MutableSpan = MemoryRef.asSpan();
-  testFullDeviceMemorySpan(MutableSpan, 10, sizeof(int));
-
-  // const ref
-  const acxxel::DeviceMemory<int> &ConstMemoryRef = MaybeMemory.getValue();
-  EXPECT_EQ(10, ConstMemoryRef.length());
-  EXPECT_EQ(10, ConstMemoryRef.size());
-  EXPECT_EQ(10 * sizeof(int), static_cast<size_t>(ConstMemoryRef.byte_size()));
-  EXPECT_FALSE(ConstMemoryRef.empty());
-
-  // immutable span
-  acxxel::DeviceMemorySpan<const int> ImmutableSpan = ConstMemoryRef.asSpan();
-  testFullDeviceMemorySpan(ImmutableSpan, 10, sizeof(int));
-}
-
-TEST_P(AcxxelTest, CopyHostAndDevice) {
-  acxxel::Platform *Platform = GetParam()().takeValue();
-  acxxel::Stream Stream = Platform->createStream().takeValue();
-  int A[] = {0, 1, 2};
-  std::array<int, arraySize(A)> B;
-  acxxel::DeviceMemory<int> X =
-      Platform->mallocD<int>(arraySize(A)).takeValue();
-  Stream.syncCopyHToD(A, X);
-  Stream.syncCopyDToH(X, B);
-  for (size_t I = 0; I < arraySize(A); ++I)
-    EXPECT_EQ(A[I], B[I]);
-  EXPECT_FALSE(Stream.takeStatus().isError());
-}
-
-TEST_P(AcxxelTest, CopyDToD) {
-  acxxel::Platform *Platform = GetParam()().takeValue();
-  acxxel::Stream Stream = Platform->createStream().takeValue();
-  int A[] = {0, 1, 2};
-  std::array<int, arraySize(A)> B;
-  acxxel::DeviceMemory<int> X =
-      Platform->mallocD<int>(arraySize(A)).takeValue();
-  acxxel::DeviceMemory<int> Y =
-      Platform->mallocD<int>(arraySize(A)).takeValue();
-  Stream.syncCopyHToD(A, X);
-  Stream.syncCopyDToD(X, Y);
-  Stream.syncCopyDToH(Y, B);
-  for (size_t I = 0; I < arraySize(A); ++I)
-    EXPECT_EQ(A[I], B[I]);
-  EXPECT_FALSE(Stream.takeStatus().isError());
-}
-
-TEST_P(AcxxelTest, AsyncCopyHostAndDevice) {
-  acxxel::Platform *Platform = GetParam()().takeValue();
-  int A[] = {0, 1, 2};
-  std::array<int, arraySize(A)> B;
-  acxxel::DeviceMemory<int> X =
-      Platform->mallocD<int>(arraySize(A)).takeValue();
-  acxxel::Stream Stream = Platform->createStream().takeValue();
-  acxxel::AsyncHostMemory<int> AsyncA =
-      Platform->registerHostMem(A).takeValue();
-  acxxel::AsyncHostMemory<int> AsyncB =
-      Platform->registerHostMem(B).takeValue();
-  EXPECT_FALSE(Stream.asyncCopyHToD(AsyncA, X).takeStatus().isError());
-  EXPECT_FALSE(Stream.asyncCopyDToH(X, AsyncB).takeStatus().isError());
-  EXPECT_FALSE(Stream.sync().isError());
-  for (size_t I = 0; I < arraySize(A); ++I)
-    EXPECT_EQ(A[I], B[I]);
-}
-
-TEST_P(AcxxelTest, AsyncMemsetD) {
-  acxxel::Platform *Platform = GetParam()().takeValue();
-  constexpr size_t ArrayLength = 10;
-  std::array<uint32_t, ArrayLength> Host;
-  acxxel::DeviceMemory<uint32_t> X =
-      Platform->mallocD<uint32_t>(ArrayLength).takeValue();
-  acxxel::Stream Stream = Platform->createStream().takeValue();
-  acxxel::AsyncHostMemory<uint32_t> AsyncHost =
-      Platform->registerHostMem(Host).takeValue();
-  EXPECT_FALSE(Stream.asyncMemsetD(X, 0x12).takeStatus().isError());
-  EXPECT_FALSE(Stream.asyncCopyDToH(X, AsyncHost).takeStatus().isError());
-  EXPECT_FALSE(Stream.sync().isError());
-  for (size_t I = 0; I < ArrayLength; ++I)
-    EXPECT_EQ(0x12121212u, Host[I]);
-}
-
-TEST_P(AcxxelTest, RegisterHostMem) {
-  acxxel::Platform *Platform = GetParam()().takeValue();
-  auto Data = std::unique_ptr<int[]>(new int[3]);
-  acxxel::Expected<acxxel::AsyncHostMemory<const int>> MaybeAsyncHostMemory =
-      Platform->registerHostMem<int>({Data.get(), 3});
-  EXPECT_FALSE(MaybeAsyncHostMemory.isError())
-      << MaybeAsyncHostMemory.getError().getMessage();
-  acxxel::AsyncHostMemory<const int> AsyncHostMemory =
-      MaybeAsyncHostMemory.takeValue();
-  EXPECT_EQ(Data.get(), AsyncHostMemory.data());
-  EXPECT_EQ(3, AsyncHostMemory.size());
-}
-
-struct RefCounter {
-  static int Count;
-
-  RefCounter() { ++Count; }
-  ~RefCounter() { --Count; }
-  RefCounter(const RefCounter &) = delete;
-  RefCounter &operator=(const RefCounter &) = delete;
-};
-
-int RefCounter::Count;
-
-TEST_P(AcxxelTest, OwnedAsyncHost) {
-  acxxel::Platform *Platform = GetParam()().takeValue();
-  RefCounter::Count = 0;
-  {
-    acxxel::OwnedAsyncHostMemory<RefCounter> A =
-        Platform->newAsyncHostMem<RefCounter>(3).takeValue();
-    EXPECT_EQ(3, RefCounter::Count);
-  }
-  EXPECT_EQ(0, RefCounter::Count);
-}
-
-TEST_P(AcxxelTest, OwnedAsyncCopyHostAndDevice) {
-  acxxel::Platform *Platform = GetParam()().takeValue();
-  size_t Length = 3;
-  acxxel::OwnedAsyncHostMemory<int> A =
-      Platform->newAsyncHostMem<int>(Length).takeValue();
-  for (size_t I = 0; I < Length; ++I)
-    A[I] = I;
-  acxxel::OwnedAsyncHostMemory<int> B =
-      Platform->newAsyncHostMem<int>(Length).takeValue();
-  acxxel::DeviceMemory<int> X = Platform->mallocD<int>(Length).takeValue();
-  acxxel::Stream Stream = Platform->createStream().takeValue();
-  EXPECT_FALSE(Stream.asyncCopyHToD(A, X).takeStatus().isError());
-  EXPECT_FALSE(Stream.asyncCopyDToH(X, B).takeStatus().isError());
-  EXPECT_FALSE(Stream.sync().isError());
-  for (size_t I = 0; I < Length; ++I)
-    EXPECT_EQ(A[I], B[I]);
-}
-
-TEST_P(AcxxelTest, AsyncCopyDToD) {
-  acxxel::Platform *Platform = GetParam()().takeValue();
-  int A[] = {0, 1, 2};
-  std::array<int, arraySize(A)> B;
-  acxxel::DeviceMemory<int> X =
-      Platform->mallocD<int>(arraySize(A)).takeValue();
-  acxxel::DeviceMemory<int> Y =
-      Platform->mallocD<int>(arraySize(A)).takeValue();
-  acxxel::Stream Stream = Platform->createStream().takeValue();
-  acxxel::AsyncHostMemory<int> AsyncA =
-      Platform->registerHostMem(A).takeValue();
-  acxxel::AsyncHostMemory<int> AsyncB =
-      Platform->registerHostMem(B).takeValue();
-  EXPECT_FALSE(Stream.asyncCopyHToD(AsyncA, X).takeStatus().isError());
-  EXPECT_FALSE(Stream.asyncCopyDToD(X, Y).takeStatus().isError());
-  EXPECT_FALSE(Stream.asyncCopyDToH(Y, AsyncB).takeStatus().isError());
-  EXPECT_FALSE(Stream.sync().isError());
-  for (size_t I = 0; I < arraySize(A); ++I)
-    EXPECT_EQ(A[I], B[I]);
-}
-
-TEST_P(AcxxelTest, Stream) {
-  acxxel::Platform *Platform = GetParam()().takeValue();
-  acxxel::Stream Stream = Platform->createStream().takeValue();
-  EXPECT_FALSE(Stream.sync().isError());
-}
-
-TEST_P(AcxxelTest, Event) {
-  acxxel::Platform *Platform = GetParam()().takeValue();
-  acxxel::Event Event = Platform->createEvent().takeValue();
-  EXPECT_TRUE(Event.isDone());
-  EXPECT_FALSE(Event.sync().isError());
-}
-
-TEST_P(AcxxelTest, RecordEventsInAStream) {
-  acxxel::Platform *Platform = GetParam()().takeValue();
-  acxxel::Stream Stream = Platform->createStream().takeValue();
-  acxxel::Event Start = Platform->createEvent().takeValue();
-  acxxel::Event End = Platform->createEvent().takeValue();
-  EXPECT_FALSE(Stream.enqueueEvent(Start).takeStatus().isError());
-  EXPECT_FALSE(Start.sync().isError());
-  std::this_thread::sleep_for(std::chrono::milliseconds(10));
-  EXPECT_FALSE(Stream.enqueueEvent(End).takeStatus().isError());
-  EXPECT_FALSE(End.sync().isError());
-  EXPECT_GT(End.getSecondsSince(Start).takeValue(), 0);
-}
-
-TEST_P(AcxxelTest, StreamCallback) {
-  acxxel::Platform *Platform = GetParam()().takeValue();
-  int Value = 0;
-  acxxel::Stream Stream = Platform->createStream().takeValue();
-  EXPECT_FALSE(
-      Stream
-          .addCallback([&Value](acxxel::Stream &, const acxxel::Status &) {
-            Value = 42;
-          })
-          .takeStatus()
-          .isError());
-  EXPECT_FALSE(Stream.sync().isError());
-  EXPECT_EQ(42, Value);
-}
-
-TEST_P(AcxxelTest, WaitForEventsInAStream) {
-  acxxel::Platform *Platform = GetParam()().takeValue();
-  acxxel::Stream Stream0 = Platform->createStream().takeValue();
-  acxxel::Stream Stream1 = Platform->createStream().takeValue();
-  acxxel::Event Event0 = Platform->createEvent().takeValue();
-  acxxel::Event Event1 = Platform->createEvent().takeValue();
-
-  // Thread loops on Stream0 until someone sets the GoFlag, then set the
-  // MarkerFlag.
-
-  std::mutex Mutex;
-  std::condition_variable ConditionVar;
-  bool GoFlag = false;
-  bool MarkerFlag = false;
-
-  EXPECT_FALSE(Stream0
-                   .addCallback([&Mutex, &ConditionVar, &GoFlag, &MarkerFlag](
-                       acxxel::Stream &, const acxxel::Status &) {
-                     std::unique_lock<std::mutex> Lock(Mutex);
-                     ConditionVar.wait(Lock,
-                                       [&GoFlag] { return GoFlag == true; });
-                     MarkerFlag = true;
-                   })
-                   .takeStatus()
-                   .isError());
-
-  // Event0 can only occur after GoFlag and MarkerFlag are set.
-  EXPECT_FALSE(Stream0.enqueueEvent(Event0).takeStatus().isError());
-
-  // Use waitOnEvent to make a callback on Stream1 wait for an event on Stream0.
-  EXPECT_FALSE(Stream1.waitOnEvent(Event0).isError());
-  EXPECT_FALSE(Stream1.enqueueEvent(Event1).takeStatus().isError());
-  EXPECT_FALSE(Stream1
-                   .addCallback([&Mutex, &MarkerFlag](acxxel::Stream &,
-                                                      const acxxel::Status &) {
-                     std::unique_lock<std::mutex> Lock(Mutex);
-                     // This makes sure that this callback runs after the
-                     // callback on Stream0.
-                     EXPECT_TRUE(MarkerFlag);
-                   })
-                   .takeStatus()
-                   .isError());
-
-  // Allow the callback on Stream0 to set MarkerFlag and finish.
-  {
-    std::unique_lock<std::mutex> Lock(Mutex);
-    GoFlag = true;
-  }
-  ConditionVar.notify_one();
-
-  // Make sure the events have finished and that Event1 did not happen before
-  // Event0.
-  EXPECT_FALSE(Event0.sync().isError());
-  EXPECT_FALSE(Event1.sync().isError());
-  EXPECT_FALSE(Stream1.sync().isError());
-}
-
-#if defined(ACXXEL_ENABLE_CUDA) || defined(ACXXEL_ENABLE_OPENCL)
-INSTANTIATE_TEST_CASE_P(BothPlatformTest, AcxxelTest,
-                        ::testing::Values(
-#ifdef ACXXEL_ENABLE_CUDA
-                            acxxel::getCUDAPlatform
-#ifdef ACXXEL_ENABLE_OPENCL
-                            ,
-#endif
-#endif
-#ifdef ACXXEL_ENABLE_OPENCL
-                            acxxel::getOpenCLPlatform
-#endif
-                            ));
-#endif
-
-} // namespace
--- a/parallel-libs/acxxel/tests/multi_device_test.cpp
+++ b/parallel-libs/acxxel/tests/multi_device_test.cpp
@ -1,87 +0,0 @@
-#include "acxxel.h"
-#include "config.h"
-#include "gtest/gtest.h"
-
-namespace {
-
-using PlatformGetter = acxxel::Expected<acxxel::Platform *> (*)();
-class MultiDeviceTest : public ::testing::TestWithParam<PlatformGetter> {};
-
-TEST_P(MultiDeviceTest, AsyncCopy) {
-  acxxel::Platform *Platform = GetParam()().takeValue();
-  int DeviceCount = Platform->getDeviceCount().getValue();
-  EXPECT_GT(DeviceCount, 0);
-
-  int Length = 3;
-  auto A = std::unique_ptr<int[]>(new int[Length]);
-  auto B0 = std::unique_ptr<int[]>(new int[Length]);
-  auto B1 = std::unique_ptr<int[]>(new int[Length]);
-
-  auto ASpan = acxxel::Span<int>(A.get(), Length);
-  auto B0Span = acxxel::Span<int>(B0.get(), Length);
-  auto B1Span = acxxel::Span<int>(B1.get(), Length);
-
-  for (int I = 0; I < Length; ++I)
-    A[I] = I;
-
-  auto AsyncA = Platform->registerHostMem(ASpan).takeValue();
-  auto AsyncB0 = Platform->registerHostMem(B0Span).takeValue();
-  auto AsyncB1 = Platform->registerHostMem(B1Span).takeValue();
-
-  acxxel::Stream Stream0 = Platform->createStream(0).takeValue();
-  acxxel::Stream Stream1 = Platform->createStream(1).takeValue();
-  auto Device0 = Platform->mallocD<int>(Length, 0).takeValue();
-  auto Device1 = Platform->mallocD<int>(Length, 1).takeValue();
-
-  EXPECT_FALSE(Stream0.asyncCopyHToD(AsyncA, Device0, Length)
-                   .asyncCopyDToH(Device0, AsyncB0, Length)
-                   .sync()
-                   .isError());
-
-  EXPECT_FALSE(Stream1.asyncCopyHToD(AsyncA, Device1, Length)
-                   .asyncCopyDToH(Device1, AsyncB1, Length)
-                   .sync()
-                   .isError());
-
-  for (int I = 0; I < Length; ++I) {
-    EXPECT_EQ(B0[I], I);
-    EXPECT_EQ(B1[I], I);
-  }
-}
-
-TEST_P(MultiDeviceTest, Events) {
-  acxxel::Platform *Platform = GetParam()().takeValue();
-  int DeviceCount = Platform->getDeviceCount().getValue();
-  EXPECT_GT(DeviceCount, 0);
-
-  acxxel::Stream Stream0 = Platform->createStream(0).takeValue();
-  acxxel::Stream Stream1 = Platform->createStream(1).takeValue();
-  acxxel::Event Event0 = Platform->createEvent(0).takeValue();
-  acxxel::Event Event1 = Platform->createEvent(1).takeValue();
-
-  EXPECT_FALSE(Stream0.enqueueEvent(Event0).sync().isError());
-  EXPECT_FALSE(Stream1.enqueueEvent(Event1).sync().isError());
-
-  EXPECT_TRUE(Event0.isDone());
-  EXPECT_TRUE(Event1.isDone());
-
-  EXPECT_FALSE(Event0.sync().isError());
-  EXPECT_FALSE(Event1.sync().isError());
-}
-
-#if defined(ACXXEL_ENABLE_CUDA) || defined(ACXXEL_ENABLE_OPENCL)
-INSTANTIATE_TEST_CASE_P(BothPlatformTest, MultiDeviceTest,
-                        ::testing::Values(
-#ifdef ACXXEL_ENABLE_CUDA
-                            acxxel::getCUDAPlatform
-#ifdef ACXXEL_ENABLE_OPENCL
-                            ,
-#endif
-#endif
-#ifdef ACXXEL_ENABLE_OPENCL
-                            acxxel::getOpenCLPlatform
-#endif
-                            ));
-#endif
-
-} // namespace
--- a/parallel-libs/acxxel/tests/opencl_test.cpp
+++ b/parallel-libs/acxxel/tests/opencl_test.cpp
@ -1,61 +0,0 @@
-//===--- opencl_test.cpp - Tests for OpenCL and the Acxxel API ------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "acxxel.h"
-#include "gtest/gtest.h"
-
-#include <array>
-#include <cstring>
-
-namespace {
-
-static const char *SaxpyKernelSource = R"(
-__kernel void saxpyKernel(float A, __global float *X, __global float *Y, int N) {
-  int I = get_global_id(0);
-  if (I < N)
-    X[I] = A * X[I] + Y[I];
-}
-)";
-
-TEST(OpenCL, Saxpy) {
-  constexpr size_t Length = 3;
-
-  float A = 2.f;
-  std::array<float, Length> X = {{0.f, 1.f, 2.f}};
-  std::array<float, Length> Y = {{3.f, 4.f, 5.f}};
-  std::array<float, Length> Expected = {{3.f, 6.f, 9.f}};
-
-  acxxel::Platform *OpenCL = acxxel::getOpenCLPlatform().getValue();
-  acxxel::Stream Stream = OpenCL->createStream().takeValue();
-  auto DeviceX = OpenCL->mallocD<float>(Length).takeValue();
-  auto DeviceY = OpenCL->mallocD<float>(Length).takeValue();
-  Stream.syncCopyHToD(X, DeviceX);
-  Stream.syncCopyHToD(Y, DeviceY);
-  acxxel::Program Program =
-      OpenCL
-          ->createProgramFromSource(acxxel::Span<const char>(
-              SaxpyKernelSource, std::strlen(SaxpyKernelSource)))
-          .takeValue();
-  acxxel::Kernel Kernel = Program.createKernel("saxpyKernel").takeValue();
-  float *RawX = static_cast<float *>(DeviceX);
-  float *RawY = static_cast<float *>(DeviceY);
-  int IntLength = Length;
-  void *Arguments[] = {&A, &RawX, &RawY, &IntLength};
-  size_t ArgumentSizes[] = {sizeof(float), sizeof(float *), sizeof(float *),
-                            sizeof(int)};
-  EXPECT_FALSE(
-      Stream.asyncKernelLaunch(Kernel, Length, Arguments, ArgumentSizes)
-          .takeStatus()
-          .isError());
-  Stream.syncCopyDToH(DeviceX, X);
-  EXPECT_FALSE(Stream.sync().isError());
-
-  EXPECT_EQ(X, Expected);
-}
-
-} // namespace
--- a/parallel-libs/acxxel/tests/span_test.cpp
+++ b/parallel-libs/acxxel/tests/span_test.cpp
@ -1,292 +0,0 @@
-//===--- span_test.cpp - Tests for the span class -------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "span.h"
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-#include <array>
-#include <vector>
-
-namespace {
-
-template <typename T, size_t N> size_t arraySize(T (&)[N]) { return N; }
-
-TEST(Span, NullConstruction) {
-  acxxel::Span<int> Span0;
-  EXPECT_EQ(nullptr, Span0.data());
-  EXPECT_EQ(0, Span0.size());
-
-  acxxel::Span<int> Span1(nullptr);
-  EXPECT_EQ(nullptr, Span1.data());
-  EXPECT_EQ(0, Span1.size());
-}
-
-TEST(Span, PtrSizeConstruction) {
-  int ZeroSize = 0;
-  acxxel::Span<int> Span0(nullptr, ZeroSize);
-  EXPECT_EQ(Span0.data(), nullptr);
-  EXPECT_EQ(Span0.size(), 0);
-
-  int Values[] = {0, 1, 2};
-  acxxel::Span<int> Span1(Values, arraySize(Values));
-  EXPECT_EQ(Span1.data(), Values);
-  EXPECT_EQ(static_cast<size_t>(Span1.size()), arraySize(Values));
-
-  acxxel::Span<int> Span2(Values, ZeroSize);
-  EXPECT_EQ(Span2.data(), Values);
-  EXPECT_EQ(Span2.size(), 0);
-}
-
-TEST(Span, PtrSizeConstruction_NegativeCount) {
-  int Values[] = {0, 1, 2};
-  EXPECT_DEATH(acxxel::Span<int> Span0(Values, -1), "terminate");
-}
-
-TEST(Span, PtrSizeConstruction_NullptrNonzeroSize) {
-  EXPECT_DEATH(acxxel::Span<int> Span0(nullptr, 1), "terminate");
-}
-
-TEST(Span, FirstLastConstruction) {
-  int Values[] = {0, 1, 2};
-
-  acxxel::Span<int> Span0(Values, Values);
-  EXPECT_EQ(Span0.data(), Values);
-  EXPECT_EQ(Span0.size(), 0);
-
-  acxxel::Span<int> Span(Values, Values + 2);
-  EXPECT_EQ(Span.data(), Values);
-  EXPECT_EQ(Span.size(), 2);
-}
-
-TEST(Span, FirstLastConstruction_LastBeforeFirst) {
-  int Values[] = {0, 1, 2};
-  EXPECT_DEATH(acxxel::Span<int> Span(Values + 2, Values), "terminate");
-}
-
-TEST(Span, ArrayConstruction) {
-  int Array[] = {0, 1, 2};
-  acxxel::Span<int> Span(Array);
-  EXPECT_EQ(Span.data(), Array);
-  EXPECT_EQ(Span.size(), 3);
-}
-
-TEST(Span, StdArrayConstruction) {
-  std::array<int, 3> Array{{0, 1, 2}};
-  acxxel::Span<int> Span(Array);
-  EXPECT_EQ(Span.data(), Array.data());
-  EXPECT_EQ(static_cast<size_t>(Span.size()), Array.size());
-
-  std::array<const int, 3> ConstArray{{0, 1, 2}};
-  acxxel::Span<const int> ConstSpan(ConstArray);
-  EXPECT_EQ(ConstSpan.data(), ConstArray.data());
-  EXPECT_EQ(static_cast<size_t>(ConstSpan.size()), ConstArray.size());
-}
-
-TEST(Span, ContainerConstruction) {
-  std::vector<int> Vector = {0, 1, 2};
-  acxxel::Span<int> Span(Vector);
-  EXPECT_EQ(Span.data(), &Vector[0]);
-  EXPECT_EQ(static_cast<size_t>(Span.size()), Vector.size());
-}
-
-TEST(Span, CopyConstruction) {
-  int Values[] = {0, 1, 2};
-  acxxel::Span<int> Span0(Values);
-  acxxel::Span<int> Span1(Span0);
-  EXPECT_EQ(Span1.data(), Values);
-  EXPECT_EQ(static_cast<size_t>(Span1.size()), arraySize(Values));
-}
-
-TEST(Span, CopyAssignment) {
-  int Values[] = {0, 1, 2};
-  acxxel::Span<int> Span0(Values);
-  acxxel::Span<int> Span1;
-  Span1 = Span0;
-  EXPECT_EQ(Span1.data(), Values);
-  EXPECT_EQ(static_cast<size_t>(Span1.size()), arraySize(Values));
-}
-
-TEST(Span, CopyConstFromNonConst) {
-  int Values[] = {0, 1, 2};
-  acxxel::Span<int> Span0(Values);
-  acxxel::Span<const int> Span1(Span0);
-  EXPECT_EQ(Span1.data(), Values);
-  EXPECT_EQ(static_cast<size_t>(Span1.size()), arraySize(Values));
-}
-
-TEST(Span, FirstMethod) {
-  int Values[] = {0, 1, 2};
-  acxxel::Span<int> Span(Values);
-  acxxel::Span<int> Span0 = Span.first(0);
-  acxxel::Span<int> Span1 = Span.first(1);
-  acxxel::Span<int> Span2 = Span.first(2);
-  acxxel::Span<int> Span3 = Span.first(3);
-
-  EXPECT_EQ(Span0.data(), Values);
-  EXPECT_EQ(Span1.data(), Values);
-  EXPECT_EQ(Span2.data(), Values);
-  EXPECT_EQ(Span3.data(), Values);
-
-  EXPECT_TRUE(Span0.empty());
-
-  EXPECT_THAT(Span1, ::testing::ElementsAre(0));
-  EXPECT_THAT(Span2, ::testing::ElementsAre(0, 1));
-  EXPECT_THAT(Span3, ::testing::ElementsAre(0, 1, 2));
-}
-
-TEST(Span, FirstMethod_IllegalArguments) {
-  int Values[] = {0, 1, 2};
-  acxxel::Span<int> Span(Values);
-
-  EXPECT_DEATH(Span.first(-1), "terminate");
-  EXPECT_DEATH(Span.first(4), "terminate");
-}
-
-TEST(Span, LastMethod) {
-  int Values[] = {0, 1, 2};
-  acxxel::Span<int> Span(Values);
-  acxxel::Span<int> Span0 = Span.last(0);
-  acxxel::Span<int> Span1 = Span.last(1);
-  acxxel::Span<int> Span2 = Span.last(2);
-  acxxel::Span<int> Span3 = Span.last(3);
-
-  EXPECT_EQ(Span0.data(), Values);
-  EXPECT_EQ(Span1.data(), Values + 2);
-  EXPECT_EQ(Span2.data(), Values + 1);
-  EXPECT_EQ(Span3.data(), Values);
-
-  EXPECT_TRUE(Span0.empty());
-
-  EXPECT_THAT(Span1, ::testing::ElementsAre(2));
-  EXPECT_THAT(Span2, ::testing::ElementsAre(1, 2));
-  EXPECT_THAT(Span3, ::testing::ElementsAre(0, 1, 2));
-}
-
-TEST(Span, LastMethod_IllegalArguments) {
-  int Values[] = {0, 1, 2};
-  acxxel::Span<int> Span(Values);
-
-  EXPECT_DEATH(Span.last(-1), "terminate");
-  EXPECT_DEATH(Span.last(4), "terminate");
-}
-
-TEST(Span, SubspanMethod) {
-  int Values[] = {0, 1, 2};
-  acxxel::Span<int> Span(Values);
-
-  acxxel::Span<int> Span0 = Span.subspan(0);
-  acxxel::Span<int> Span0e = Span.subspan(0, acxxel::dynamic_extent);
-  acxxel::Span<int> Span00 = Span.subspan(0, 0);
-  acxxel::Span<int> Span01 = Span.subspan(0, 1);
-  acxxel::Span<int> Span02 = Span.subspan(0, 2);
-  acxxel::Span<int> Span03 = Span.subspan(0, 3);
-
-  acxxel::Span<int> Span1 = Span.subspan(1);
-  acxxel::Span<int> Span1e = Span.subspan(1, acxxel::dynamic_extent);
-  acxxel::Span<int> Span10 = Span.subspan(1, 0);
-  acxxel::Span<int> Span11 = Span.subspan(1, 1);
-  acxxel::Span<int> Span12 = Span.subspan(1, 2);
-
-  acxxel::Span<int> Span2 = Span.subspan(2);
-  acxxel::Span<int> Span2e = Span.subspan(2, acxxel::dynamic_extent);
-  acxxel::Span<int> Span20 = Span.subspan(2, 0);
-  acxxel::Span<int> Span21 = Span.subspan(2, 1);
-
-  acxxel::Span<int> Span3 = Span.subspan(3);
-  acxxel::Span<int> Span3e = Span.subspan(3, acxxel::dynamic_extent);
-  acxxel::Span<int> Span30 = Span.subspan(3, 0);
-
-  EXPECT_EQ(Span0.data(), Values);
-  EXPECT_EQ(Span0e.data(), Values);
-  EXPECT_EQ(Span00.data(), Values);
-  EXPECT_EQ(Span01.data(), Values);
-  EXPECT_EQ(Span02.data(), Values);
-  EXPECT_EQ(Span03.data(), Values);
-
-  EXPECT_EQ(Span1.data(), Values + 1);
-  EXPECT_EQ(Span1e.data(), Values + 1);
-  EXPECT_EQ(Span10.data(), Values + 1);
-  EXPECT_EQ(Span11.data(), Values + 1);
-  EXPECT_EQ(Span12.data(), Values + 1);
-
-  EXPECT_EQ(Span2.data(), Values + 2);
-  EXPECT_EQ(Span2e.data(), Values + 2);
-  EXPECT_EQ(Span20.data(), Values + 2);
-  EXPECT_EQ(Span21.data(), Values + 2);
-
-  EXPECT_EQ(Span3.data(), Values + 3);
-  EXPECT_EQ(Span3e.data(), Values + 3);
-  EXPECT_EQ(Span30.data(), Values + 3);
-
-  EXPECT_TRUE(Span00.empty());
-  EXPECT_TRUE(Span10.empty());
-  EXPECT_TRUE(Span20.empty());
-  EXPECT_TRUE(Span30.empty());
-
-  EXPECT_THAT(Span0, ::testing::ElementsAre(0, 1, 2));
-  EXPECT_THAT(Span0e, ::testing::ElementsAre(0, 1, 2));
-  EXPECT_THAT(Span01, ::testing::ElementsAre(0));
-  EXPECT_THAT(Span02, ::testing::ElementsAre(0, 1));
-  EXPECT_THAT(Span03, ::testing::ElementsAre(0, 1, 2));
-
-  EXPECT_THAT(Span1, ::testing::ElementsAre(1, 2));
-  EXPECT_THAT(Span1e, ::testing::ElementsAre(1, 2));
-  EXPECT_THAT(Span11, ::testing::ElementsAre(1));
-  EXPECT_THAT(Span12, ::testing::ElementsAre(1, 2));
-
-  EXPECT_THAT(Span2, ::testing::ElementsAre(2));
-  EXPECT_THAT(Span2e, ::testing::ElementsAre(2));
-  EXPECT_THAT(Span21, ::testing::ElementsAre(2));
-
-  EXPECT_TRUE(Span3.empty());
-  EXPECT_TRUE(Span3e.empty());
-}
-
-TEST(Span, SubspanMethod_IllegalArguments) {
-  int Values[] = {0, 1, 2};
-  acxxel::Span<int> Span(Values);
-  EXPECT_DEATH(Span.subspan(-1, 0), "terminate");
-  EXPECT_DEATH(Span.subspan(0, -2), "terminate");
-  EXPECT_DEATH(Span.subspan(0, 4), "terminate");
-  EXPECT_DEATH(Span.subspan(1, 3), "terminate");
-  EXPECT_DEATH(Span.subspan(2, 2), "terminate");
-  EXPECT_DEATH(Span.subspan(3, 1), "terminate");
-  EXPECT_DEATH(Span.subspan(4, 0), "terminate");
-}
-
-TEST(Span, ElementAccess) {
-  int Values[] = {0, 1, 2};
-  acxxel::Span<int> Span(Values);
-
-  EXPECT_EQ(&Span[0], Values);
-  EXPECT_EQ(&Span[1], Values + 1);
-  EXPECT_EQ(&Span[2], Values + 2);
-  EXPECT_EQ(&Span(0), Values);
-  EXPECT_EQ(&Span(1), Values + 1);
-  EXPECT_EQ(&Span(2), Values + 2);
-
-  Span[0] = 5;
-  EXPECT_EQ(Values[0], 5);
-
-  Span(0) = 0;
-  EXPECT_EQ(Values[0], 0);
-
-  const int ConstValues[] = {0, 1, 2};
-  acxxel::Span<const int> ConstSpan(ConstValues);
-
-  EXPECT_EQ(&ConstSpan[0], ConstValues);
-  EXPECT_EQ(&ConstSpan[1], ConstValues + 1);
-  EXPECT_EQ(&ConstSpan[2], ConstValues + 2);
-  EXPECT_EQ(&ConstSpan(0), ConstValues);
-  EXPECT_EQ(&ConstSpan(1), ConstValues + 1);
-  EXPECT_EQ(&ConstSpan(2), ConstValues + 2);
-}
-
-} // namespace
--- a/parallel-libs/acxxel/tests/status_test.cpp
+++ b/parallel-libs/acxxel/tests/status_test.cpp
@ -1,55 +0,0 @@
-//===--- status_test.cpp - Tests for the Status and Expected classes ------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "status.h"
-
-#include "gtest/gtest.h"
-
-#include <memory>
-
-namespace {
-
-struct RefCounter {
-  static int Count;
-
-  RefCounter() { ++Count; }
-  ~RefCounter() { --Count; }
-  RefCounter(const RefCounter &) = delete;
-  RefCounter &operator=(const RefCounter &) = delete;
-};
-
-int RefCounter::Count;
-
-TEST(Expected, RefCounter) {
-  RefCounter::Count = 0;
-  using uptr = std::unique_ptr<RefCounter>;
-
-  acxxel::Expected<uptr> E0(uptr(new RefCounter));
-  EXPECT_FALSE(E0.isError());
-  EXPECT_EQ(1, RefCounter::Count);
-
-  acxxel::Expected<uptr> E1(std::move(E0));
-  EXPECT_FALSE(E1.isError());
-  EXPECT_EQ(1, RefCounter::Count);
-
-  acxxel::Expected<uptr> E2(acxxel::Status("nothing in here yet"));
-  EXPECT_TRUE(E2.isError());
-  EXPECT_EQ(1, RefCounter::Count);
-  E2 = std::move(E1);
-  EXPECT_FALSE(E2.isError());
-  EXPECT_EQ(1, RefCounter::Count);
-
-  EXPECT_EQ(1, E2.getValue()->Count);
-  EXPECT_FALSE(E2.isError());
-  EXPECT_EQ(1, RefCounter::Count);
-
-  EXPECT_EQ(1, E2.takeValue()->Count);
-  EXPECT_EQ(0, RefCounter::Count);
-}
-
-} // namespace