Joseph Huber d18a784d41
[compiler-rt] Define GPU specific handling of profiling functions (#185763)
Summary:
The changes in https://www.github.com/llvm/llvm-project/pull/185552
allowed us to
start building the standard `libclang_rt.profile.a` for GPU targets.
This PR expands this by adding an optimized GPU routine for counter
increment and removing the special-case handling of these functions in
the OpenMP runtime.

Vast majority of these functions are boilerplate, but we should be able
to do more interesting things with this in the future, like value or
memory profiling.
2026-03-19 10:51:48 -05:00

205 lines
5.0 KiB
CMake

CHECK_CXX_SOURCE_COMPILES("
#ifdef _WIN32
#include <intrin.h> /* Workaround for PR19898. */
#include <windows.h>
#endif
int main() {
#ifdef _WIN32
volatile LONG val = 1;
MemoryBarrier();
InterlockedCompareExchange(&val, 0, 1);
InterlockedIncrement(&val);
InterlockedDecrement(&val);
#else
volatile unsigned long val = 1;
__sync_synchronize();
__sync_val_compare_and_swap(&val, 1, 0);
__sync_add_and_fetch(&val, 1);
__sync_sub_and_fetch(&val, 1);
#endif
return 0;
}
" COMPILER_RT_TARGET_HAS_ATOMICS)
CHECK_CXX_SOURCE_COMPILES("
#if defined(__linux__)
#include <unistd.h>
#endif
#include <fcntl.h>
int fd;
int main() {
struct flock s_flock;
s_flock.l_type = F_WRLCK;
fcntl(fd, F_SETLKW, &s_flock);
return 0;
}
" COMPILER_RT_TARGET_HAS_FCNTL_LCK)
CHECK_CXX_SOURCE_COMPILES("
#include <sys/file.h>
int fd;
int main() {
flock(fd, LOCK_EX);
return 0;
}
" COMPILER_RT_TARGET_HAS_FLOCK)
CHECK_CXX_SOURCE_COMPILES("
#include <sys/utsname.h>
int main() {
return 0;
}
" COMPILER_RT_TARGET_HAS_UNAME)
add_compiler_rt_component(profile)
set(PROFILE_SOURCES
InstrProfiling.c
InstrProfilingInternal.c
InstrProfilingBuffer.c
InstrProfilingMerge.c
InstrProfilingMergeFile.c
InstrProfilingNameVar.c
InstrProfilingVersionVar.c
InstrProfilingWriter.c
InstrProfilingPlatformAIX.c
InstrProfilingPlatformDarwin.c
InstrProfilingPlatformFuchsia.c
InstrProfilingPlatformLinux.c
InstrProfilingPlatformOther.c
InstrProfilingPlatformWindows.c
InstrProfilingPlatformGPU.c
)
if (NOT COMPILER_RT_PROFILE_BAREMETAL)
# For baremetal, exclude the following:
# - Anything that contains filesystem operations (InstrProfilingFile.c,
# InstrProfilingUtils.c)
# - Initialization, because it isn't necesary without the filesystem bits
# on ELF targets (InstrProfilingRuntime.cpp).
# - Value profiling, because it requires malloc (InstrProfilingValue.c).
# This could be optional if someone needs it.
# - GCDA profiling, which is unrelated (GCDAProfiling.c)
list(APPEND PROFILE_SOURCES
GCDAProfiling.c
InstrProfilingFile.c
InstrProfilingRuntime.cpp
InstrProfilingUtil.c
InstrProfilingValue.c
)
endif()
set(PROFILE_HEADERS
InstrProfiling.h
InstrProfilingInternal.h
InstrProfilingPort.h
InstrProfilingUtil.h
WindowsMMap.h
)
if(WIN32)
list(APPEND PROFILE_SOURCES
WindowsMMap.c
)
endif()
include_directories(..)
include_directories(../../include)
if(FUCHSIA OR UNIX)
set(EXTRA_FLAGS
-fPIC
-Wno-pedantic)
endif()
if(CMAKE_SYSTEM_NAME STREQUAL "WASI")
set(EXTRA_FLAGS
${EXTRA_FLAGS}
-D_WASI_EMULATED_MMAN
-D_WASI_EMULATED_GETPID)
endif()
if(COMPILER_RT_TARGET_HAS_ATOMICS)
set(EXTRA_FLAGS
${EXTRA_FLAGS}
-DCOMPILER_RT_HAS_ATOMICS=1)
endif()
if(COMPILER_RT_TARGET_HAS_FCNTL_LCK)
set(EXTRA_FLAGS
${EXTRA_FLAGS}
-DCOMPILER_RT_HAS_FCNTL_LCK=1)
endif()
if(COMPILER_RT_TARGET_HAS_FLOCK)
set(EXTRA_FLAGS
${EXTRA_FLAGS}
-DCOMPILER_RT_HAS_FLOCK=1)
endif()
if(COMPILER_RT_TARGET_HAS_UNAME)
set(EXTRA_FLAGS
${EXTRA_FLAGS}
-DCOMPILER_RT_HAS_UNAME=1)
endif()
if(COMPILER_RT_PROFILE_BAREMETAL)
set(EXTRA_FLAGS
${EXTRA_FLAGS}
-DCOMPILER_RT_PROFILE_BAREMETAL=1)
endif()
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx")
append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding EXTRA_FLAGS)
append_list_if(COMPILER_RT_HAS_NOGPULIB_FLAG -nogpulib EXTRA_FLAGS)
append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto EXTRA_FLAGS)
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
append_list_if(COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG
"SHELL:-Xclang -mcode-object-version=none" EXTRA_FLAGS)
endif()
endif()
if(MSVC)
# profile historically has only been supported with the static runtime
# on windows
set(CMAKE_MSVC_RUNTIME_LIBRARY MultiThreaded)
endif()
# We don't use the C++ Standard Library here, so avoid including it by mistake.
append_list_if(COMPILER_RT_HAS_NOSTDINCXX_FLAG -nostdinc++ EXTRA_FLAGS)
# XRay uses C++ standard library headers.
string(REGEX REPLACE "-?-stdlib=[a-zA-Z+]*" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
# This appears to be a C-only warning banning the use of locals in aggregate
# initializers. All other compilers accept this, though.
# nonstandard extension used : 'identifier' : cannot be initialized using address of automatic variable
append_list_if(COMPILER_RT_HAS_WD4221_FLAG /wd4221 EXTRA_FLAGS)
# Disable 'nonstandard extension used: translation unit is empty'.
append_list_if(COMPILER_RT_HAS_WD4206_FLAG /wd4206 EXTRA_FLAGS)
if(APPLE)
add_compiler_rt_runtime(clang_rt.profile
STATIC
OS ${PROFILE_SUPPORTED_OS}
ARCHS ${PROFILE_SUPPORTED_ARCH}
CFLAGS ${EXTRA_FLAGS}
SOURCES ${PROFILE_SOURCES}
ADDITIONAL_HEADERS ${PROFILE_HEADERS}
PARENT_TARGET profile)
else()
add_compiler_rt_runtime(clang_rt.profile
STATIC
ARCHS ${PROFILE_SUPPORTED_ARCH}
CFLAGS ${EXTRA_FLAGS}
SOURCES ${PROFILE_SOURCES}
ADDITIONAL_HEADERS ${PROFILE_HEADERS}
PARENT_TARGET profile)
endif()