Summary: The changes in https://www.github.com/llvm/llvm-project/pull/185552 allowed us to start building the standard `libclang_rt.profile.a` for GPU targets. This PR expands this by adding an optimized GPU routine for counter increment and removing the special-case handling of these functions in the OpenMP runtime. Vast majority of these functions are boilerplate, but we should be able to do more interesting things with this in the future, like value or memory profiling.
109 lines
4.8 KiB
CMake
109 lines
4.8 KiB
CMake
# Ensure the compiler is a valid clang when building the GPU target.
|
|
set(req_ver "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}")
|
|
if(LLVM_VERSION_MAJOR AND NOT (CMAKE_CXX_COMPILER_ID MATCHES "[Cc]lang" AND
|
|
${CMAKE_CXX_COMPILER_VERSION} VERSION_EQUAL "${req_ver}"))
|
|
message(FATAL_ERROR "Cannot build GPU device runtime. CMake compiler "
|
|
"'${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}' "
|
|
" is not 'Clang ${req_ver}'.")
|
|
endif()
|
|
|
|
set(src_files
|
|
${CMAKE_CURRENT_SOURCE_DIR}/src/Allocator.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/src/Configuration.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/src/Debug.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/src/Kernel.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/src/LibC.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/src/Mapping.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/src/Misc.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/src/Parallelism.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/src/Reduction.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/src/State.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/src/Synchronization.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/src/Tasking.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/src/DeviceUtils.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/src/Workshare.cpp
|
|
)
|
|
|
|
list(APPEND compile_options -flto)
|
|
list(APPEND compile_options -fvisibility=hidden)
|
|
list(APPEND compile_options -nogpulib)
|
|
list(APPEND compile_options -nostdlibinc)
|
|
list(APPEND compile_options -fno-rtti)
|
|
list(APPEND compile_options -fno-exceptions)
|
|
list(APPEND compile_options -fconvergent-functions)
|
|
list(APPEND compile_options -Wno-unknown-cuda-version)
|
|
|
|
if(LLVM_DEFAULT_TARGET_TRIPLE)
|
|
list(APPEND compile_options --target=${LLVM_DEFAULT_TARGET_TRIPLE})
|
|
endif()
|
|
|
|
# We disable the slp vectorizer during the runtime optimization to avoid
|
|
# vectorized accesses to the shared state. Generally, those are "good" but
|
|
# the optimizer pipeline (esp. Attributor) does not fully support vectorized
|
|
# instructions yet and we end up missing out on way more important constant
|
|
# propagation. That said, we will run the vectorizer again after the runtime
|
|
# has been linked into the user program.
|
|
list(APPEND compile_options "SHELL: -mllvm -vectorize-slp=false")
|
|
if("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^amdgcn" OR
|
|
"${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^amdgcn")
|
|
set(target_name "amdgpu")
|
|
list(APPEND compile_options "SHELL:-Xclang -mcode-object-version=none")
|
|
elseif("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^nvptx" OR
|
|
"${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^nvptx")
|
|
set(target_name "nvptx")
|
|
list(APPEND compile_options --cuda-feature=+ptx63)
|
|
elseif("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^spirv" OR
|
|
"${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^spirv")
|
|
set(target_name "spirv")
|
|
endif()
|
|
|
|
# Trick to combine these into a bitcode file via the linker's LTO pass.
|
|
add_executable(libompdevice ${src_files})
|
|
set_target_properties(libompdevice PROPERTIES
|
|
RUNTIME_OUTPUT_DIRECTORY "${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}"
|
|
LINKER_LANGUAGE CXX
|
|
BUILD_RPATH ""
|
|
INSTALL_RPATH ""
|
|
RUNTIME_OUTPUT_NAME libomptarget-${target_name}.bc)
|
|
|
|
# If the user built with the GPU C library enabled we will use that instead.
|
|
if(TARGET libc)
|
|
target_compile_definitions(libompdevice PRIVATE OMPTARGET_HAS_LIBC)
|
|
endif()
|
|
target_compile_definitions(libompdevice PRIVATE SHARED_SCRATCHPAD_SIZE=512)
|
|
|
|
target_include_directories(libompdevice PRIVATE
|
|
${CMAKE_CURRENT_SOURCE_DIR}/include
|
|
${CMAKE_CURRENT_SOURCE_DIR}/../../libc
|
|
${CMAKE_CURRENT_SOURCE_DIR}/../../offload/include)
|
|
target_compile_options(libompdevice PRIVATE ${compile_options})
|
|
if(NOT "${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^spirv" AND
|
|
NOT "${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^spirv")
|
|
target_link_options(libompdevice PRIVATE
|
|
"-flto" "-r" "-nostdlib" "-Wl,--lto-emit-llvm")
|
|
else()
|
|
target_link_options(libompdevice PRIVATE
|
|
"-nostdlib" "-emit-llvm")
|
|
endif()
|
|
|
|
if(LLVM_DEFAULT_TARGET_TRIPLE)
|
|
target_link_options(libompdevice PRIVATE "--target=${LLVM_DEFAULT_TARGET_TRIPLE}")
|
|
endif()
|
|
install(TARGETS libompdevice
|
|
PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ
|
|
DESTINATION ${OPENMP_INSTALL_LIBDIR})
|
|
|
|
add_library(ompdevice.all_objs OBJECT IMPORTED)
|
|
set_property(TARGET ompdevice.all_objs APPEND PROPERTY IMPORTED_OBJECTS
|
|
${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}/libomptarget-${target_name}.bc)
|
|
|
|
# Archive all the object files generated above into a static library
|
|
add_library(ompdevice STATIC)
|
|
add_dependencies(ompdevice libompdevice)
|
|
set_target_properties(ompdevice PROPERTIES
|
|
ARCHIVE_OUTPUT_DIRECTORY "${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}"
|
|
LINKER_LANGUAGE CXX
|
|
)
|
|
target_link_libraries(ompdevice PRIVATE ompdevice.all_objs)
|
|
install(TARGETS ompdevice ARCHIVE DESTINATION "${OPENMP_INSTALL_LIBDIR}")
|