Summary: The changes in https://www.github.com/llvm/llvm-project/pull/185552 allowed us to start building the standard `libclang_rt.profile.a` for GPU targets. This PR expands this by adding an optimized GPU routine for counter increment and removing the special-case handling of these functions in the OpenMP runtime. Vast majority of these functions are boilerplate, but we should be able to do more interesting things with this in the future, like value or memory profiling.
205 lines
5.0 KiB
CMake
205 lines
5.0 KiB
CMake
|
|
CHECK_CXX_SOURCE_COMPILES("
|
|
#ifdef _WIN32
|
|
#include <intrin.h> /* Workaround for PR19898. */
|
|
#include <windows.h>
|
|
#endif
|
|
int main() {
|
|
#ifdef _WIN32
|
|
volatile LONG val = 1;
|
|
MemoryBarrier();
|
|
InterlockedCompareExchange(&val, 0, 1);
|
|
InterlockedIncrement(&val);
|
|
InterlockedDecrement(&val);
|
|
#else
|
|
volatile unsigned long val = 1;
|
|
__sync_synchronize();
|
|
__sync_val_compare_and_swap(&val, 1, 0);
|
|
__sync_add_and_fetch(&val, 1);
|
|
__sync_sub_and_fetch(&val, 1);
|
|
#endif
|
|
return 0;
|
|
}
|
|
" COMPILER_RT_TARGET_HAS_ATOMICS)
|
|
|
|
CHECK_CXX_SOURCE_COMPILES("
|
|
#if defined(__linux__)
|
|
#include <unistd.h>
|
|
#endif
|
|
#include <fcntl.h>
|
|
int fd;
|
|
int main() {
|
|
struct flock s_flock;
|
|
|
|
s_flock.l_type = F_WRLCK;
|
|
fcntl(fd, F_SETLKW, &s_flock);
|
|
return 0;
|
|
}
|
|
|
|
" COMPILER_RT_TARGET_HAS_FCNTL_LCK)
|
|
|
|
CHECK_CXX_SOURCE_COMPILES("
|
|
#include <sys/file.h>
|
|
|
|
int fd;
|
|
int main() {
|
|
flock(fd, LOCK_EX);
|
|
return 0;
|
|
}
|
|
|
|
" COMPILER_RT_TARGET_HAS_FLOCK)
|
|
|
|
CHECK_CXX_SOURCE_COMPILES("
|
|
#include <sys/utsname.h>
|
|
int main() {
|
|
return 0;
|
|
}
|
|
|
|
" COMPILER_RT_TARGET_HAS_UNAME)
|
|
|
|
add_compiler_rt_component(profile)
|
|
|
|
set(PROFILE_SOURCES
|
|
InstrProfiling.c
|
|
InstrProfilingInternal.c
|
|
InstrProfilingBuffer.c
|
|
InstrProfilingMerge.c
|
|
InstrProfilingMergeFile.c
|
|
InstrProfilingNameVar.c
|
|
InstrProfilingVersionVar.c
|
|
InstrProfilingWriter.c
|
|
InstrProfilingPlatformAIX.c
|
|
InstrProfilingPlatformDarwin.c
|
|
InstrProfilingPlatformFuchsia.c
|
|
InstrProfilingPlatformLinux.c
|
|
InstrProfilingPlatformOther.c
|
|
InstrProfilingPlatformWindows.c
|
|
InstrProfilingPlatformGPU.c
|
|
)
|
|
|
|
if (NOT COMPILER_RT_PROFILE_BAREMETAL)
|
|
# For baremetal, exclude the following:
|
|
# - Anything that contains filesystem operations (InstrProfilingFile.c,
|
|
# InstrProfilingUtils.c)
|
|
# - Initialization, because it isn't necesary without the filesystem bits
|
|
# on ELF targets (InstrProfilingRuntime.cpp).
|
|
# - Value profiling, because it requires malloc (InstrProfilingValue.c).
|
|
# This could be optional if someone needs it.
|
|
# - GCDA profiling, which is unrelated (GCDAProfiling.c)
|
|
list(APPEND PROFILE_SOURCES
|
|
GCDAProfiling.c
|
|
InstrProfilingFile.c
|
|
InstrProfilingRuntime.cpp
|
|
InstrProfilingUtil.c
|
|
InstrProfilingValue.c
|
|
)
|
|
endif()
|
|
|
|
set(PROFILE_HEADERS
|
|
InstrProfiling.h
|
|
InstrProfilingInternal.h
|
|
InstrProfilingPort.h
|
|
InstrProfilingUtil.h
|
|
WindowsMMap.h
|
|
)
|
|
|
|
if(WIN32)
|
|
list(APPEND PROFILE_SOURCES
|
|
WindowsMMap.c
|
|
)
|
|
endif()
|
|
|
|
include_directories(..)
|
|
include_directories(../../include)
|
|
|
|
if(FUCHSIA OR UNIX)
|
|
set(EXTRA_FLAGS
|
|
-fPIC
|
|
-Wno-pedantic)
|
|
endif()
|
|
|
|
if(CMAKE_SYSTEM_NAME STREQUAL "WASI")
|
|
set(EXTRA_FLAGS
|
|
${EXTRA_FLAGS}
|
|
-D_WASI_EMULATED_MMAN
|
|
-D_WASI_EMULATED_GETPID)
|
|
endif()
|
|
|
|
if(COMPILER_RT_TARGET_HAS_ATOMICS)
|
|
set(EXTRA_FLAGS
|
|
${EXTRA_FLAGS}
|
|
-DCOMPILER_RT_HAS_ATOMICS=1)
|
|
endif()
|
|
|
|
if(COMPILER_RT_TARGET_HAS_FCNTL_LCK)
|
|
set(EXTRA_FLAGS
|
|
${EXTRA_FLAGS}
|
|
-DCOMPILER_RT_HAS_FCNTL_LCK=1)
|
|
endif()
|
|
|
|
if(COMPILER_RT_TARGET_HAS_FLOCK)
|
|
set(EXTRA_FLAGS
|
|
${EXTRA_FLAGS}
|
|
-DCOMPILER_RT_HAS_FLOCK=1)
|
|
endif()
|
|
|
|
if(COMPILER_RT_TARGET_HAS_UNAME)
|
|
set(EXTRA_FLAGS
|
|
${EXTRA_FLAGS}
|
|
-DCOMPILER_RT_HAS_UNAME=1)
|
|
endif()
|
|
|
|
if(COMPILER_RT_PROFILE_BAREMETAL)
|
|
set(EXTRA_FLAGS
|
|
${EXTRA_FLAGS}
|
|
-DCOMPILER_RT_PROFILE_BAREMETAL=1)
|
|
endif()
|
|
|
|
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx")
|
|
append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding EXTRA_FLAGS)
|
|
append_list_if(COMPILER_RT_HAS_NOGPULIB_FLAG -nogpulib EXTRA_FLAGS)
|
|
append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto EXTRA_FLAGS)
|
|
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
|
|
append_list_if(COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG
|
|
"SHELL:-Xclang -mcode-object-version=none" EXTRA_FLAGS)
|
|
endif()
|
|
endif()
|
|
|
|
if(MSVC)
|
|
# profile historically has only been supported with the static runtime
|
|
# on windows
|
|
set(CMAKE_MSVC_RUNTIME_LIBRARY MultiThreaded)
|
|
endif()
|
|
|
|
# We don't use the C++ Standard Library here, so avoid including it by mistake.
|
|
append_list_if(COMPILER_RT_HAS_NOSTDINCXX_FLAG -nostdinc++ EXTRA_FLAGS)
|
|
# XRay uses C++ standard library headers.
|
|
string(REGEX REPLACE "-?-stdlib=[a-zA-Z+]*" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
|
|
|
# This appears to be a C-only warning banning the use of locals in aggregate
|
|
# initializers. All other compilers accept this, though.
|
|
# nonstandard extension used : 'identifier' : cannot be initialized using address of automatic variable
|
|
append_list_if(COMPILER_RT_HAS_WD4221_FLAG /wd4221 EXTRA_FLAGS)
|
|
|
|
# Disable 'nonstandard extension used: translation unit is empty'.
|
|
append_list_if(COMPILER_RT_HAS_WD4206_FLAG /wd4206 EXTRA_FLAGS)
|
|
|
|
if(APPLE)
|
|
add_compiler_rt_runtime(clang_rt.profile
|
|
STATIC
|
|
OS ${PROFILE_SUPPORTED_OS}
|
|
ARCHS ${PROFILE_SUPPORTED_ARCH}
|
|
CFLAGS ${EXTRA_FLAGS}
|
|
SOURCES ${PROFILE_SOURCES}
|
|
ADDITIONAL_HEADERS ${PROFILE_HEADERS}
|
|
PARENT_TARGET profile)
|
|
else()
|
|
add_compiler_rt_runtime(clang_rt.profile
|
|
STATIC
|
|
ARCHS ${PROFILE_SUPPORTED_ARCH}
|
|
CFLAGS ${EXTRA_FLAGS}
|
|
SOURCES ${PROFILE_SOURCES}
|
|
ADDITIONAL_HEADERS ${PROFILE_HEADERS}
|
|
PARENT_TARGET profile)
|
|
endif()
|