From fd069a46bf77227e058475508c679f3a004d7b36 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 10 Mar 2026 13:45:18 -0500 Subject: [PATCH] [copmiler-rt] Initial support for building profile library on the GPU (#185552) Summary: As suggested in https://github.com/llvm/llvm-project/pull/177665, we should build a GPU version of the compiler-rt profile library instead of writing it in-line in the lowering. This PR does not define anything GPU specific, it simply re-uses the baremetal handling. Later PRs will prevent the GPU specific handling we would want to do to optimize counter handling on the GPU. Note that this will require using the cache file, or setting these options manually for existing users. Hopefully if people are using the cache file as they should it won't break anything. --- compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake | 4 +++- compiler-rt/cmake/base-config-ix.cmake | 6 ++---- compiler-rt/cmake/builtin-config-ix.cmake | 1 - compiler-rt/cmake/caches/GPU.cmake | 6 ++++-- compiler-rt/cmake/config-ix.cmake | 3 +++ compiler-rt/lib/builtins/CMakeLists.txt | 2 -- compiler-rt/lib/profile/CMakeLists.txt | 10 ++++++++++ offload/cmake/caches/AMDGPULibcBot.cmake | 2 +- offload/cmake/caches/FlangOffload.cmake | 4 ++-- offload/cmake/caches/Offload.cmake | 4 ++-- 10 files changed, 27 insertions(+), 15 deletions(-) diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake index c2de0d0f652e..c463771223f0 100644 --- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake @@ -1,3 +1,4 @@ +set(AMDGPU amdgcn) set(ARM64 aarch64) set(ARM32 arm armhf) set(HEXAGON hexagon) @@ -6,6 +7,7 @@ set(X86_64 x86_64) set(LOONGARCH64 loongarch64) set(MIPS32 mips mipsel) set(MIPS64 mips64 mips64el) +set(NVPTX nvptx64) set(PPC32 powerpc powerpcspe) set(PPC64 powerpc64 powerpc64le) set(RISCV32 riscv32) @@ -91,7 +93,7 @@ set(ALL_NSAN_SUPPORTED_ARCH ${X86_64}) set(ALL_HWASAN_SUPPORTED_ARCH ${X86_64} ${ARM64} ${RISCV64}) set(ALL_MEMPROF_SUPPORTED_ARCH ${X86_64}) set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC32} ${PPC64} - ${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON} + ${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON} ${AMDGPU} ${NVPTX} ${RISCV32} ${RISCV64} ${LOONGARCH64} ${WASM32}) set(ALL_CTX_PROFILE_SUPPORTED_ARCH ${X86_64}) if (OS_NAME MATCHES "FreeBSD") diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake index 37dfa5534dfe..5ab351d98964 100644 --- a/compiler-rt/cmake/base-config-ix.cmake +++ b/compiler-rt/cmake/base-config-ix.cmake @@ -225,8 +225,7 @@ macro(test_targets) endif() elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn") test_target_arch(amdgcn "" "--target=amdgcn-amd-amdhsa" "-nogpulib" - "-flto" "-fconvergent-functions" - "-Xclang -mcode-object-version=none") + "-flto" "-Xclang -mcode-object-version=none") elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "hexagon") test_target_arch(hexagon "" "") elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "loongarch64") @@ -270,8 +269,7 @@ macro(test_targets) test_target_arch(mips64 "" "-mips64r2" "-mabi=64") endif() elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "nvptx") - test_target_arch(nvptx64 "" "--nvptx64-nvidia-cuda" "-nogpulib" "-flto" - "-fconvergent-functions" "-c") + test_target_arch(nvptx64 "" "--nvptx64-nvidia-cuda" "-nogpulib" "-flto" "-c") elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "arm") if(WIN32) test_target_arch(arm "" "" "") diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake index 569582a35e7a..abaaeadf3bd0 100644 --- a/compiler-rt/cmake/builtin-config-ix.cmake +++ b/compiler-rt/cmake/builtin-config-ix.cmake @@ -22,7 +22,6 @@ builtin_check_c_compiler_flag(-Wno-c2y-extensions COMPILER_RT_HAS_WNO_C2Y_EXTE builtin_check_c_compiler_flag(-Wno-pedantic COMPILER_RT_HAS_WNO_PEDANTIC) builtin_check_c_compiler_flag(-nogpulib COMPILER_RT_HAS_NOGPULIB_FLAG) builtin_check_c_compiler_flag(-flto COMPILER_RT_HAS_FLTO_FLAG) -builtin_check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG) builtin_check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG) builtin_check_c_compiler_flag(-Wbuiltin-declaration-mismatch COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG) builtin_check_c_compiler_flag(/Zl COMPILER_RT_HAS_ZL_FLAG) diff --git a/compiler-rt/cmake/caches/GPU.cmake b/compiler-rt/cmake/caches/GPU.cmake index e448774cf145..9feccccc5ce1 100644 --- a/compiler-rt/cmake/caches/GPU.cmake +++ b/compiler-rt/cmake/caches/GPU.cmake @@ -1,8 +1,9 @@ # This file sets up a CMakeCache for GPU builds of compiler-rt. This supports -# amdgcn and nvptx builds targeting the builtins library. +# amdgcn and nvptx builds targeting the builtins and profile libraries. set(COMPILER_RT_INCLUDE_TESTS OFF CACHE BOOL "") set(COMPILER_RT_HAS_SAFESTACK OFF CACHE BOOL "") +set(COMPILER_RT_DEFAULT_TARGET_ONLY ON CACHE BOOL "") set(COMPILER_RT_BUILD_BUILTINS ON CACHE BOOL "") set(COMPILER_RT_BAREMETAL_BUILD ON CACHE BOOL "") @@ -10,9 +11,10 @@ set(COMPILER_RT_BUILD_CRT OFF CACHE BOOL "") set(COMPILER_RT_BUILD_SANITIZERS OFF CACHE BOOL "") set(COMPILER_RT_BUILD_XRAY OFF CACHE BOOL "") set(COMPILER_RT_BUILD_LIBFUZZER OFF CACHE BOOL "") -set(COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL "") +set(COMPILER_RT_BUILD_PROFILE ON CACHE BOOL "") set(COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL "") set(COMPILER_RT_BUILD_XRAY_NO_PREINIT OFF CACHE BOOL "") set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "") set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "") set(COMPILER_RT_BUILD_SCUDO_SANTDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "") +set(COMPILER_RT_PROFILE_BAREMETAL ON CACHE BOOL "") diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index 95cac0f8faa9..381e2e4b28ab 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -115,6 +115,9 @@ check_cxx_compiler_flag(--sysroot=. COMPILER_RT_HAS_SYSROOT_FLAG) check_cxx_compiler_flag("-Werror -mcrc" COMPILER_RT_HAS_MCRC_FLAG) check_cxx_compiler_flag(-fno-partial-inlining COMPILER_RT_HAS_FNO_PARTIAL_INLINING_FLAG) check_cxx_compiler_flag("-Werror -ftrivial-auto-var-init=pattern" COMPILER_RT_HAS_TRIVIAL_AUTO_INIT) +check_c_compiler_flag(-nogpulib COMPILER_RT_HAS_NOGPULIB_FLAG) +check_c_compiler_flag(-flto COMPILER_RT_HAS_FLTO_FLAG) +check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG) if(NOT WIN32 AND NOT CYGWIN) # MinGW warns if -fvisibility-inlines-hidden is used. diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 7ce929657eb8..6c27f6d4d529 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -957,8 +957,6 @@ else () append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding BUILTIN_CFLAGS) append_list_if(COMPILER_RT_HAS_NOGPULIB_FLAG -nogpulib BUILTIN_CFLAGS) append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto BUILTIN_CFLAGS) - append_list_if(COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG - -fconvergent-functions BUILTIN_CFLAGS) # AMDGPU targets want to use a generic ABI. if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn") diff --git a/compiler-rt/lib/profile/CMakeLists.txt b/compiler-rt/lib/profile/CMakeLists.txt index ca2b97a3169a..4cc2610cec87 100644 --- a/compiler-rt/lib/profile/CMakeLists.txt +++ b/compiler-rt/lib/profile/CMakeLists.txt @@ -154,6 +154,16 @@ if(COMPILER_RT_PROFILE_BAREMETAL) -DCOMPILER_RT_PROFILE_BAREMETAL=1) endif() +if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx") + append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding EXTRA_FLAGS) + append_list_if(COMPILER_RT_HAS_NOGPULIB_FLAG -nogpulib EXTRA_FLAGS) + append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto EXTRA_FLAGS) + if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn") + append_list_if(COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG + "SHELL:-Xclang -mcode-object-version=none" EXTRA_FLAGS) + endif() +endif() + if(MSVC) # profile historically has only been supported with the static runtime # on windows diff --git a/offload/cmake/caches/AMDGPULibcBot.cmake b/offload/cmake/caches/AMDGPULibcBot.cmake index 798f080a41ad..ffd4f7c9b1ad 100644 --- a/offload/cmake/caches/AMDGPULibcBot.cmake +++ b/offload/cmake/caches/AMDGPULibcBot.cmake @@ -17,6 +17,6 @@ set(CLANG_DEFAULT_LINKER "lld" CACHE STRING "") set(CLANG_DEFAULT_RTLIB "compiler-rt" STRING "") set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa CACHE STRING "") -set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "") +set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "") set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;openmp;libc;libcxxabi;libcxx" CACHE STRING "") set(RUNTIMES_amdgcn-amd-amdhsa_LIBC_GPU_TEST_JOBS 4 CACHE STRING "") diff --git a/offload/cmake/caches/FlangOffload.cmake b/offload/cmake/caches/FlangOffload.cmake index ed75df6bb7e1..d8e1c9265574 100644 --- a/offload/cmake/caches/FlangOffload.cmake +++ b/offload/cmake/caches/FlangOffload.cmake @@ -3,8 +3,8 @@ set(LLVM_ENABLE_RUNTIMES "compiler-rt;flang-rt;libunwind;libcxx;libcxxabi;openmp set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ON CACHE BOOL "") set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda CACHE STRING "") -set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "") -set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "") +set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "") +set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "") set(RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi;flang-rt" CACHE STRING "") set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi;flang-rt" CACHE STRING "") set(RUNTIMES_nvptx64-nvidia-cuda_FLANG_RT_LIBC_PROVIDER llvm CACHE STRING "") diff --git a/offload/cmake/caches/Offload.cmake b/offload/cmake/caches/Offload.cmake index 3747a1d3eb29..14cc63a50848 100644 --- a/offload/cmake/caches/Offload.cmake +++ b/offload/cmake/caches/Offload.cmake @@ -3,7 +3,7 @@ set(LLVM_ENABLE_RUNTIMES "compiler-rt;libunwind;libcxx;libcxxabi;openmp;offload" set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ON CACHE BOOL "") set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda CACHE STRING "") -set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "") -set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "") +set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "") +set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "") set(RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi" CACHE STRING "") set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi" CACHE STRING "")