[copmiler-rt] Initial support for building profile library on the GPU (#185552)

Summary:
As suggested in https://github.com/llvm/llvm-project/pull/177665, we
should build a GPU version of the compiler-rt profile library instead of
writing it in-line in the lowering. This PR does not define anything GPU
specific, it simply re-uses the baremetal handling. Later PRs will
prevent the GPU specific handling we would want to do to optimize
counter handling on the GPU.

Note that this will require using the cache file, or setting these
options
manually for existing users. Hopefully if people are using the cache
file
as they should it won't break anything.
This commit is contained in:
Joseph Huber 2026-03-10 13:45:18 -05:00 committed by GitHub
parent 072e869c9c
commit fd069a46bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 27 additions and 15 deletions

View File

@ -1,3 +1,4 @@
set(AMDGPU amdgcn)
set(ARM64 aarch64)
set(ARM32 arm armhf)
set(HEXAGON hexagon)
@ -6,6 +7,7 @@ set(X86_64 x86_64)
set(LOONGARCH64 loongarch64)
set(MIPS32 mips mipsel)
set(MIPS64 mips64 mips64el)
set(NVPTX nvptx64)
set(PPC32 powerpc powerpcspe)
set(PPC64 powerpc64 powerpc64le)
set(RISCV32 riscv32)
@ -91,7 +93,7 @@ set(ALL_NSAN_SUPPORTED_ARCH ${X86_64})
set(ALL_HWASAN_SUPPORTED_ARCH ${X86_64} ${ARM64} ${RISCV64})
set(ALL_MEMPROF_SUPPORTED_ARCH ${X86_64})
set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC32} ${PPC64}
${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON}
${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON} ${AMDGPU} ${NVPTX}
${RISCV32} ${RISCV64} ${LOONGARCH64} ${WASM32})
set(ALL_CTX_PROFILE_SUPPORTED_ARCH ${X86_64})
if (OS_NAME MATCHES "FreeBSD")

View File

@ -225,8 +225,7 @@ macro(test_targets)
endif()
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
test_target_arch(amdgcn "" "--target=amdgcn-amd-amdhsa" "-nogpulib"
"-flto" "-fconvergent-functions"
"-Xclang -mcode-object-version=none")
"-flto" "-Xclang -mcode-object-version=none")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "hexagon")
test_target_arch(hexagon "" "")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "loongarch64")
@ -270,8 +269,7 @@ macro(test_targets)
test_target_arch(mips64 "" "-mips64r2" "-mabi=64")
endif()
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "nvptx")
test_target_arch(nvptx64 "" "--nvptx64-nvidia-cuda" "-nogpulib" "-flto"
"-fconvergent-functions" "-c")
test_target_arch(nvptx64 "" "--nvptx64-nvidia-cuda" "-nogpulib" "-flto" "-c")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "arm")
if(WIN32)
test_target_arch(arm "" "" "")

View File

@ -22,7 +22,6 @@ builtin_check_c_compiler_flag(-Wno-c2y-extensions COMPILER_RT_HAS_WNO_C2Y_EXTE
builtin_check_c_compiler_flag(-Wno-pedantic COMPILER_RT_HAS_WNO_PEDANTIC)
builtin_check_c_compiler_flag(-nogpulib COMPILER_RT_HAS_NOGPULIB_FLAG)
builtin_check_c_compiler_flag(-flto COMPILER_RT_HAS_FLTO_FLAG)
builtin_check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG)
builtin_check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG)
builtin_check_c_compiler_flag(-Wbuiltin-declaration-mismatch COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG)
builtin_check_c_compiler_flag(/Zl COMPILER_RT_HAS_ZL_FLAG)

View File

@ -1,8 +1,9 @@
# This file sets up a CMakeCache for GPU builds of compiler-rt. This supports
# amdgcn and nvptx builds targeting the builtins library.
# amdgcn and nvptx builds targeting the builtins and profile libraries.
set(COMPILER_RT_INCLUDE_TESTS OFF CACHE BOOL "")
set(COMPILER_RT_HAS_SAFESTACK OFF CACHE BOOL "")
set(COMPILER_RT_DEFAULT_TARGET_ONLY ON CACHE BOOL "")
set(COMPILER_RT_BUILD_BUILTINS ON CACHE BOOL "")
set(COMPILER_RT_BAREMETAL_BUILD ON CACHE BOOL "")
@ -10,9 +11,10 @@ set(COMPILER_RT_BUILD_CRT OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_SANITIZERS OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_XRAY OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_LIBFUZZER OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_PROFILE ON CACHE BOOL "")
set(COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_XRAY_NO_PREINIT OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_SCUDO_SANTDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "")
set(COMPILER_RT_PROFILE_BAREMETAL ON CACHE BOOL "")

View File

@ -115,6 +115,9 @@ check_cxx_compiler_flag(--sysroot=. COMPILER_RT_HAS_SYSROOT_FLAG)
check_cxx_compiler_flag("-Werror -mcrc" COMPILER_RT_HAS_MCRC_FLAG)
check_cxx_compiler_flag(-fno-partial-inlining COMPILER_RT_HAS_FNO_PARTIAL_INLINING_FLAG)
check_cxx_compiler_flag("-Werror -ftrivial-auto-var-init=pattern" COMPILER_RT_HAS_TRIVIAL_AUTO_INIT)
check_c_compiler_flag(-nogpulib COMPILER_RT_HAS_NOGPULIB_FLAG)
check_c_compiler_flag(-flto COMPILER_RT_HAS_FLTO_FLAG)
check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG)
if(NOT WIN32 AND NOT CYGWIN)
# MinGW warns if -fvisibility-inlines-hidden is used.

View File

@ -957,8 +957,6 @@ else ()
append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding BUILTIN_CFLAGS)
append_list_if(COMPILER_RT_HAS_NOGPULIB_FLAG -nogpulib BUILTIN_CFLAGS)
append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto BUILTIN_CFLAGS)
append_list_if(COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG
-fconvergent-functions BUILTIN_CFLAGS)
# AMDGPU targets want to use a generic ABI.
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")

View File

@ -154,6 +154,16 @@ if(COMPILER_RT_PROFILE_BAREMETAL)
-DCOMPILER_RT_PROFILE_BAREMETAL=1)
endif()
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx")
append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding EXTRA_FLAGS)
append_list_if(COMPILER_RT_HAS_NOGPULIB_FLAG -nogpulib EXTRA_FLAGS)
append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto EXTRA_FLAGS)
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
append_list_if(COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG
"SHELL:-Xclang -mcode-object-version=none" EXTRA_FLAGS)
endif()
endif()
if(MSVC)
# profile historically has only been supported with the static runtime
# on windows

View File

@ -17,6 +17,6 @@ set(CLANG_DEFAULT_LINKER "lld" CACHE STRING "")
set(CLANG_DEFAULT_RTLIB "compiler-rt" STRING "")
set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa CACHE STRING "")
set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;openmp;libc;libcxxabi;libcxx" CACHE STRING "")
set(RUNTIMES_amdgcn-amd-amdhsa_LIBC_GPU_TEST_JOBS 4 CACHE STRING "")

View File

@ -3,8 +3,8 @@ set(LLVM_ENABLE_RUNTIMES "compiler-rt;flang-rt;libunwind;libcxx;libcxxabi;openmp
set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ON CACHE BOOL "")
set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda CACHE STRING "")
set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "")
set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "")
set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
set(RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi;flang-rt" CACHE STRING "")
set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi;flang-rt" CACHE STRING "")
set(RUNTIMES_nvptx64-nvidia-cuda_FLANG_RT_LIBC_PROVIDER llvm CACHE STRING "")

View File

@ -3,7 +3,7 @@ set(LLVM_ENABLE_RUNTIMES "compiler-rt;libunwind;libcxx;libcxxabi;openmp;offload"
set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ON CACHE BOOL "")
set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda CACHE STRING "")
set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "")
set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "")
set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
set(RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi" CACHE STRING "")
set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi" CACHE STRING "")