[compiler-rt] Define GPU specific handling of profiling functions (#185763)
Summary: The changes in https://www.github.com/llvm/llvm-project/pull/185552 allowed us to start building the standard `libclang_rt.profile.a` for GPU targets. This PR expands this by adding an optimized GPU routine for counter increment and removing the special-case handling of these functions in the OpenMP runtime. Vast majority of these functions are boilerplate, but we should be able to do more interesting things with this in the future, like value or memory profiling.
This commit is contained in:
parent
923cc2d43b
commit
d18a784d41
@ -74,6 +74,7 @@ set(PROFILE_SOURCES
|
||||
InstrProfilingPlatformLinux.c
|
||||
InstrProfilingPlatformOther.c
|
||||
InstrProfilingPlatformWindows.c
|
||||
InstrProfilingPlatformGPU.c
|
||||
)
|
||||
|
||||
if (NOT COMPILER_RT_PROFILE_BAREMETAL)
|
||||
|
||||
@ -166,6 +166,16 @@ void __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data,
|
||||
uint32_t CounterIndex,
|
||||
uint64_t CounterValue);
|
||||
|
||||
/*!
|
||||
* \brief Wave-cooperative counter increment for GPU targets.
|
||||
*
|
||||
* Reduces per-lane atomic contention by electing a single lane per wave to
|
||||
* perform the counter update. \c Uniform is an optional counter tracking the
|
||||
* number of uniform.
|
||||
*/
|
||||
void __llvm_profile_instrument_gpu(uint64_t *Counter, uint64_t *Uniform,
|
||||
uint64_t Step);
|
||||
|
||||
/*!
|
||||
* \brief Write instrumentation data to the current file.
|
||||
*
|
||||
|
||||
42
compiler-rt/lib/profile/InstrProfilingPlatformGPU.c
Normal file
42
compiler-rt/lib/profile/InstrProfilingPlatformGPU.c
Normal file
@ -0,0 +1,42 @@
|
||||
/*===- InstrProfilingPlatformGPU.c - GPU profiling support ----------------===*\
|
||||
|*
|
||||
|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
|* See https://llvm.org/LICENSE.txt for license information.
|
||||
|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|*
|
||||
\*===----------------------------------------------------------------------===*/
|
||||
|
||||
// GPU-specific profiling functions for AMDGPU and NVPTX targets. This file
|
||||
// provides:
|
||||
//
|
||||
// Platform plumbing (section boundaries, binary IDs, VNodes) are handled by
|
||||
// InstrProfilingPlatformLinux.c via the COMPILER_RT_PROFILE_BAREMETAL path.
|
||||
|
||||
#if defined(__NVPTX__) || defined(__AMDGPU__)
|
||||
|
||||
#include "InstrProfiling.h"
|
||||
#include <gpuintrin.h>
|
||||
|
||||
// Indicates that the current wave is fully occupied.
|
||||
static int is_uniform(uint64_t mask) {
|
||||
const uint64_t uniform_mask = ~0ull >> (64 - __gpu_num_lanes());
|
||||
return mask == uniform_mask;
|
||||
}
|
||||
|
||||
// Wave-cooperative counter increment. The instrumentation pass emits calls to
|
||||
// this in place of the default non-atomic load/add/store or atomicrmw sequence.
|
||||
// The optional uniform counter allows calculating wave uniformity if present.
|
||||
COMPILER_RT_VISIBILITY void __llvm_profile_instrument_gpu(uint64_t *counter,
|
||||
uint64_t *uniform,
|
||||
uint64_t step) {
|
||||
uint64_t mask = __gpu_lane_mask();
|
||||
if (__gpu_is_first_in_lane(mask)) {
|
||||
__scoped_atomic_fetch_add(counter, step * __builtin_popcountg(mask),
|
||||
__ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);
|
||||
if (uniform && is_uniform(mask))
|
||||
__scoped_atomic_fetch_add(uniform, step * __builtin_popcountg(mask),
|
||||
__ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -1192,8 +1192,19 @@ void InstrLowerer::lowerIncrement(InstrProfIncrementInst *Inc) {
|
||||
auto *Addr = getCounterAddress(Inc);
|
||||
|
||||
IRBuilder<> Builder(Inc);
|
||||
if (Options.Atomic || AtomicCounterUpdateAll ||
|
||||
(Inc->getIndex()->isNullValue() && AtomicFirstCounter)) {
|
||||
if (isGPUProfTarget(M)) {
|
||||
auto *I64Ty = Builder.getInt64Ty();
|
||||
auto *PtrTy = Builder.getPtrTy();
|
||||
auto *CalleeTy = FunctionType::get(Type::getVoidTy(M.getContext()),
|
||||
{PtrTy, PtrTy, I64Ty}, false);
|
||||
auto Callee =
|
||||
M.getOrInsertFunction("__llvm_profile_instrument_gpu", CalleeTy);
|
||||
Value *CastAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, PtrTy);
|
||||
Value *Uniform =
|
||||
ConstantPointerNull::get(PointerType::getUnqual(M.getContext()));
|
||||
Builder.CreateCall(Callee, {CastAddr, Uniform, Inc->getStep()});
|
||||
} else if (Options.Atomic || AtomicCounterUpdateAll ||
|
||||
(Inc->getIndex()->isNullValue() && AtomicFirstCounter)) {
|
||||
Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
|
||||
MaybeAlign(), AtomicOrdering::Monotonic);
|
||||
} else {
|
||||
|
||||
@ -12,12 +12,6 @@ else()
|
||||
set(LIBOMPTARGET_DEBUG False)
|
||||
endif()
|
||||
|
||||
if ("compiler-rt" IN_LIST LLVM_ENABLE_RUNTIMES)
|
||||
set(LIBOMPTARGET_TEST_GPU_PGO True)
|
||||
else()
|
||||
set(LIBOMPTARGET_TEST_GPU_PGO False)
|
||||
endif()
|
||||
|
||||
# Replace the space from user's input with ";" in case that CMake add escape
|
||||
# char into the lit command.
|
||||
string(REPLACE " " ";" LIBOMPTARGET_LIT_ARG_LIST "${LIBOMPTARGET_LIT_ARGS}")
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
# Configuration file for the 'lit' test runner.
|
||||
|
||||
import os
|
||||
import glob
|
||||
import lit.formats
|
||||
|
||||
# Tell pylint that we know config and lit_config exist somewhere.
|
||||
@ -132,7 +133,17 @@ if config.libomptarget_has_libc:
|
||||
config.available_features.add('libc')
|
||||
|
||||
profdata_path = os.path.join(config.bin_llvm_tools_dir, "llvm-profdata")
|
||||
if config.libomptarget_test_pgo:
|
||||
target = config.libomptarget_current_target
|
||||
for suffix in ['-JIT-LTO', '-LTO']:
|
||||
if target.endswith(suffix):
|
||||
target = target[:-len(suffix)]
|
||||
break
|
||||
has_profile_rt = True
|
||||
if target.startswith('amdgcn') or target.startswith('nvptx'):
|
||||
has_profile_rt = bool(glob.glob(os.path.join(
|
||||
config.llvm_lib_directory, 'clang', '*', 'lib', target,
|
||||
'libclang_rt.profile.a')))
|
||||
if has_profile_rt:
|
||||
config.available_features.add('pgo')
|
||||
config.substitutions.append(("%profdata", profdata_path))
|
||||
|
||||
|
||||
@ -27,7 +27,6 @@ config.offload_device_info = "@OFFLOAD_DEVICE_INFO_EXECUTABLE@"
|
||||
config.libomptarget_debug = @LIBOMPTARGET_DEBUG@
|
||||
config.has_libomptarget_ompt = @LIBOMPTARGET_OMPT_SUPPORT@
|
||||
config.libomptarget_has_libc = @LIBOMPTARGET_GPU_LIBC_SUPPORT@
|
||||
config.libomptarget_test_pgo = @LIBOMPTARGET_TEST_GPU_PGO@
|
||||
config.offload_tblgen = "@OFFLOAD_TBLGEN_EXECUTABLE@"
|
||||
# Let the main config do the real work.
|
||||
lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg")
|
||||
|
||||
@ -16,7 +16,6 @@ set(src_files
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/Mapping.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/Misc.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/Parallelism.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/Profiling.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/Reduction.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/State.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/Synchronization.cpp
|
||||
|
||||
@ -1,21 +0,0 @@
|
||||
//===-------- Profiling.h - OpenMP interface ---------------------- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef OMPTARGET_DEVICERTL_PROFILING_H
|
||||
#define OMPTARGET_DEVICERTL_PROFILING_H
|
||||
|
||||
extern "C" {
|
||||
void __llvm_profile_register_function(void *Ptr);
|
||||
void __llvm_profile_register_names_function(void *Ptr, long int I);
|
||||
void __llvm_profile_instrument_memop(long int I, void *Ptr, int I2);
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -1,18 +0,0 @@
|
||||
//===------- Profiling.cpp ---------------------------------------- C++ ---===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "Profiling.h"
|
||||
|
||||
extern "C" {
|
||||
|
||||
// Provides empty implementations for certain functions in compiler-rt
|
||||
// that are emitted by the PGO instrumentation.
|
||||
void __llvm_profile_register_function(void *Ptr) {}
|
||||
void __llvm_profile_register_names_function(void *Ptr, long int I) {}
|
||||
void __llvm_profile_instrument_memop(long int I, void *Ptr, int I2) {}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user