diff --git a/compiler-rt/lib/profile/CMakeLists.txt b/compiler-rt/lib/profile/CMakeLists.txt index 4cc2610cec87..86328b4c1392 100644 --- a/compiler-rt/lib/profile/CMakeLists.txt +++ b/compiler-rt/lib/profile/CMakeLists.txt @@ -74,6 +74,7 @@ set(PROFILE_SOURCES InstrProfilingPlatformLinux.c InstrProfilingPlatformOther.c InstrProfilingPlatformWindows.c + InstrProfilingPlatformGPU.c ) if (NOT COMPILER_RT_PROFILE_BAREMETAL) diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h index 187ef55ef378..54013d7e6568 100644 --- a/compiler-rt/lib/profile/InstrProfiling.h +++ b/compiler-rt/lib/profile/InstrProfiling.h @@ -166,6 +166,16 @@ void __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data, uint32_t CounterIndex, uint64_t CounterValue); +/*! + * \brief Wave-cooperative counter increment for GPU targets. + * + * Reduces per-lane atomic contention by electing a single lane per wave to + * perform the counter update. \c Uniform is an optional counter tracking the + * number of uniform. + */ +void __llvm_profile_instrument_gpu(uint64_t *Counter, uint64_t *Uniform, + uint64_t Step); + /*! * \brief Write instrumentation data to the current file. * diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformGPU.c b/compiler-rt/lib/profile/InstrProfilingPlatformGPU.c new file mode 100644 index 000000000000..78bf512f8c44 --- /dev/null +++ b/compiler-rt/lib/profile/InstrProfilingPlatformGPU.c @@ -0,0 +1,42 @@ +/*===- InstrProfilingPlatformGPU.c - GPU profiling support ----------------===*\ +|* +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +|* See https://llvm.org/LICENSE.txt for license information. +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +|* +\*===----------------------------------------------------------------------===*/ + +// GPU-specific profiling functions for AMDGPU and NVPTX targets. This file +// provides: +// +// Platform plumbing (section boundaries, binary IDs, VNodes) are handled by +// InstrProfilingPlatformLinux.c via the COMPILER_RT_PROFILE_BAREMETAL path. + +#if defined(__NVPTX__) || defined(__AMDGPU__) + +#include "InstrProfiling.h" +#include + +// Indicates that the current wave is fully occupied. +static int is_uniform(uint64_t mask) { + const uint64_t uniform_mask = ~0ull >> (64 - __gpu_num_lanes()); + return mask == uniform_mask; +} + +// Wave-cooperative counter increment. The instrumentation pass emits calls to +// this in place of the default non-atomic load/add/store or atomicrmw sequence. +// The optional uniform counter allows calculating wave uniformity if present. +COMPILER_RT_VISIBILITY void __llvm_profile_instrument_gpu(uint64_t *counter, + uint64_t *uniform, + uint64_t step) { + uint64_t mask = __gpu_lane_mask(); + if (__gpu_is_first_in_lane(mask)) { + __scoped_atomic_fetch_add(counter, step * __builtin_popcountg(mask), + __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); + if (uniform && is_uniform(mask)) + __scoped_atomic_fetch_add(uniform, step * __builtin_popcountg(mask), + __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); + } +} + +#endif diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 199b7357fa86..d1696f4afbe3 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -1192,8 +1192,19 @@ void InstrLowerer::lowerIncrement(InstrProfIncrementInst *Inc) { auto *Addr = getCounterAddress(Inc); IRBuilder<> Builder(Inc); - if (Options.Atomic || AtomicCounterUpdateAll || - (Inc->getIndex()->isNullValue() && AtomicFirstCounter)) { + if (isGPUProfTarget(M)) { + auto *I64Ty = Builder.getInt64Ty(); + auto *PtrTy = Builder.getPtrTy(); + auto *CalleeTy = FunctionType::get(Type::getVoidTy(M.getContext()), + {PtrTy, PtrTy, I64Ty}, false); + auto Callee = + M.getOrInsertFunction("__llvm_profile_instrument_gpu", CalleeTy); + Value *CastAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, PtrTy); + Value *Uniform = + ConstantPointerNull::get(PointerType::getUnqual(M.getContext())); + Builder.CreateCall(Callee, {CastAddr, Uniform, Inc->getStep()}); + } else if (Options.Atomic || AtomicCounterUpdateAll || + (Inc->getIndex()->isNullValue() && AtomicFirstCounter)) { Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(), MaybeAlign(), AtomicOrdering::Monotonic); } else { diff --git a/offload/test/CMakeLists.txt b/offload/test/CMakeLists.txt index 40da2a7d573e..434f25f512a4 100644 --- a/offload/test/CMakeLists.txt +++ b/offload/test/CMakeLists.txt @@ -12,12 +12,6 @@ else() set(LIBOMPTARGET_DEBUG False) endif() -if ("compiler-rt" IN_LIST LLVM_ENABLE_RUNTIMES) - set(LIBOMPTARGET_TEST_GPU_PGO True) -else() - set(LIBOMPTARGET_TEST_GPU_PGO False) -endif() - # Replace the space from user's input with ";" in case that CMake add escape # char into the lit command. string(REPLACE " " ";" LIBOMPTARGET_LIT_ARG_LIST "${LIBOMPTARGET_LIT_ARGS}") diff --git a/offload/test/lit.cfg b/offload/test/lit.cfg index 2d5d69167109..2226764f9aa9 100644 --- a/offload/test/lit.cfg +++ b/offload/test/lit.cfg @@ -2,6 +2,7 @@ # Configuration file for the 'lit' test runner. import os +import glob import lit.formats # Tell pylint that we know config and lit_config exist somewhere. @@ -132,7 +133,17 @@ if config.libomptarget_has_libc: config.available_features.add('libc') profdata_path = os.path.join(config.bin_llvm_tools_dir, "llvm-profdata") -if config.libomptarget_test_pgo: +target = config.libomptarget_current_target +for suffix in ['-JIT-LTO', '-LTO']: + if target.endswith(suffix): + target = target[:-len(suffix)] + break +has_profile_rt = True +if target.startswith('amdgcn') or target.startswith('nvptx'): + has_profile_rt = bool(glob.glob(os.path.join( + config.llvm_lib_directory, 'clang', '*', 'lib', target, + 'libclang_rt.profile.a'))) +if has_profile_rt: config.available_features.add('pgo') config.substitutions.append(("%profdata", profdata_path)) diff --git a/offload/test/lit.site.cfg.in b/offload/test/lit.site.cfg.in index c8ba45c9683e..47b1fbd18514 100644 --- a/offload/test/lit.site.cfg.in +++ b/offload/test/lit.site.cfg.in @@ -27,7 +27,6 @@ config.offload_device_info = "@OFFLOAD_DEVICE_INFO_EXECUTABLE@" config.libomptarget_debug = @LIBOMPTARGET_DEBUG@ config.has_libomptarget_ompt = @LIBOMPTARGET_OMPT_SUPPORT@ config.libomptarget_has_libc = @LIBOMPTARGET_GPU_LIBC_SUPPORT@ -config.libomptarget_test_pgo = @LIBOMPTARGET_TEST_GPU_PGO@ config.offload_tblgen = "@OFFLOAD_TBLGEN_EXECUTABLE@" # Let the main config do the real work. lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg") diff --git a/openmp/device/CMakeLists.txt b/openmp/device/CMakeLists.txt index 096a6fe0b6e7..ff5a64fdd2f0 100644 --- a/openmp/device/CMakeLists.txt +++ b/openmp/device/CMakeLists.txt @@ -16,7 +16,6 @@ set(src_files ${CMAKE_CURRENT_SOURCE_DIR}/src/Mapping.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/Misc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/Parallelism.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/Profiling.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/Reduction.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/State.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/Synchronization.cpp diff --git a/openmp/device/include/Profiling.h b/openmp/device/include/Profiling.h deleted file mode 100644 index d99475225412..000000000000 --- a/openmp/device/include/Profiling.h +++ /dev/null @@ -1,21 +0,0 @@ -//===-------- Profiling.h - OpenMP interface ---------------------- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#ifndef OMPTARGET_DEVICERTL_PROFILING_H -#define OMPTARGET_DEVICERTL_PROFILING_H - -extern "C" { -void __llvm_profile_register_function(void *Ptr); -void __llvm_profile_register_names_function(void *Ptr, long int I); -void __llvm_profile_instrument_memop(long int I, void *Ptr, int I2); -} - -#endif diff --git a/openmp/device/src/Profiling.cpp b/openmp/device/src/Profiling.cpp deleted file mode 100644 index df141af5ebee..000000000000 --- a/openmp/device/src/Profiling.cpp +++ /dev/null @@ -1,18 +0,0 @@ -//===------- Profiling.cpp ---------------------------------------- C++ ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Profiling.h" - -extern "C" { - -// Provides empty implementations for certain functions in compiler-rt -// that are emitted by the PGO instrumentation. -void __llvm_profile_register_function(void *Ptr) {} -void __llvm_profile_register_names_function(void *Ptr, long int I) {} -void __llvm_profile_instrument_memop(long int I, void *Ptr, int I2) {} -}