Joseph Huber d18a784d41
[compiler-rt] Define GPU specific handling of profiling functions (#185763)
Summary:
The changes in https://www.github.com/llvm/llvm-project/pull/185552
allowed us to
start building the standard `libclang_rt.profile.a` for GPU targets.
This PR expands this by adding an optimized GPU routine for counter
increment and removing the special-case handling of these functions in
the OpenMP runtime.

Vast majority of these functions are boilerplate, but we should be able
to do more interesting things with this in the future, like value or
memory profiling.
2026-03-19 10:51:48 -05:00

464 lines
22 KiB
Python

# -*- Python -*- vim: set ft=python ts=4 sw=4 expandtab tw=79:
# Configuration file for the 'lit' test runner.
import os
import glob
import lit.formats
# Tell pylint that we know config and lit_config exist somewhere.
if 'PYLINT_IMPORT' in os.environ:
config = object()
lit_config = object()
# Use the CUDA device as suggested by the env
if 'CUDA_VISIBLE_DEVICES' in os.environ:
config.environment['CUDA_VISIBLE_DEVICES'] = os.environ['CUDA_VISIBLE_DEVICES']
# Use the ROCR device as suggested by the env
if 'ROCR_VISIBLE_DEVICES' in os.environ:
config.environment['ROCR_VISIBLE_DEVICES'] = os.environ['ROCR_VISIBLE_DEVICES']
# Allow running the tests with omptarget debug output
if 'LIBOMPTARGET_DEBUG' in os.environ:
config.environment['LIBOMPTARGET_DEBUG'] = os.environ['LIBOMPTARGET_DEBUG']
# Allow running tests with attach auto treated as always
if 'LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS' in os.environ:
config.environment['LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS'] = os.environ['LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS']
# Allow running the tests with nextgen plugins when available
if 'LIBOMPTARGET_NEXTGEN_PLUGINS' in os.environ:
config.environment['LIBOMPTARGET_NEXTGEN_PLUGINS'] = os.environ['LIBOMPTARGET_NEXTGEN_PLUGINS']
if 'LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS' in os.environ:
config.environment['LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS'] = os.environ['LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS']
if 'OMP_TARGET_OFFLOAD' in os.environ:
config.environment['OMP_TARGET_OFFLOAD'] = os.environ['OMP_TARGET_OFFLOAD']
if 'HSA_ENABLE_SDMA' in os.environ:
config.environment['HSA_ENABLE_SDMA'] = os.environ['HSA_ENABLE_SDMA']
# Architectures like gfx942 may or may not be APUs so an additional environment
# variable is required as some tests can be APU specific.
config.environment['IS_APU'] = os.environ.get('IS_APU', '0')
# set default environment variables for test
if 'CHECK_OPENMP_ENV' in os.environ:
test_env = os.environ['CHECK_OPENMP_ENV'].split()
for env in test_env:
name = env.split('=')[0]
value = env.split('=')[1]
config.environment[name] = value
def append_dynamic_library_path(name, value, sep):
if name in config.environment:
config.environment[name] = value + sep + config.environment[name]
else:
config.environment[name] = value
def prepend_executable_path(path):
oldsearchpath = config.environment.get('PATH')
newsearchpath = f'{path}{os.path.pathsep}{oldsearchpath}' if oldsearchpath else path
config.environment['PATH'] = newsearchpath
# Evaluate the environment variable which is a string boolean value.
def evaluate_bool_env(env):
env = env.lower()
possible_true_values = ["on", "true", "1"]
for v in possible_true_values:
if env == v:
return True
return False
# name: The name of this test suite.
config.name = 'libomptarget :: ' + config.libomptarget_current_target
# suffixes: A list of file extensions to treat as test files.
config.suffixes = ['.c', '.cpp', '.cc', '.f90', '.cu', '.td']
# excludes: A list of directories to exclude from the testuites.
config.excludes = ['Inputs', 'unit']
# test_source_root: The root path where tests are located.
config.test_source_root = os.path.dirname(__file__)
# test_exec_root: The root object directory where output is placed
config.test_exec_root = config.libomptarget_obj_root
# test format
config.test_format = lit.formats.ShTest()
# compiler flags
config.test_flags = " -I " + config.test_source_root + \
" -I " + config.omp_header_directory + \
" -L " + config.library_dir + \
" -L " + config.llvm_library_intdir + \
" -L " + config.llvm_lib_directory
# Some tests require LLVM tools to execute. These include:
#
# * lld by JIT tests
# * llvm-symbolizer by sanitizer tests
#
# Add the tools bin dir to the search path so they can find theses executables.
# Prefer the tools from the configured LLVM install or bootstrapping build over
# system-installed versions which might be too old or not exist.
prepend_executable_path(config.bin_llvm_tools_dir)
# compiler specific flags
config.test_flags_clang = ""
config.test_flags_flang = "-fopenmp-version=52"
if config.omp_host_rtl_directory:
config.test_flags = config.test_flags + " -L " + \
config.omp_host_rtl_directory
config.test_flags = config.test_flags + " " + config.test_extra_flags
# Allow REQUIRES / UNSUPPORTED / XFAIL to work
config.target_triple = [ ]
for feature in config.test_compiler_features:
config.available_features.add(feature)
if config.libomptarget_debug:
config.available_features.add('libomptarget-debug')
if config.has_libomptarget_ompt:
config.available_features.add('ompt')
config.available_features.add(config.libomptarget_current_target)
if config.libomptarget_has_libc:
config.available_features.add('libc')
profdata_path = os.path.join(config.bin_llvm_tools_dir, "llvm-profdata")
target = config.libomptarget_current_target
for suffix in ['-JIT-LTO', '-LTO']:
if target.endswith(suffix):
target = target[:-len(suffix)]
break
has_profile_rt = True
if target.startswith('amdgcn') or target.startswith('nvptx'):
has_profile_rt = bool(glob.glob(os.path.join(
config.llvm_lib_directory, 'clang', '*', 'lib', target,
'libclang_rt.profile.a')))
if has_profile_rt:
config.available_features.add('pgo')
config.substitutions.append(("%profdata", profdata_path))
# Determine whether the test system supports unified memory.
# For CUDA, this is the case with compute capability 70 (Volta) or higher.
# For all other targets, we currently assume it is.
supports_unified_shared_memory = True
supports_apu = False
supports_large_allocation_memory_pool = False
if config.libomptarget_current_target.startswith('nvptx'):
try:
cuda_arch = int(config.cuda_test_arch[:3])
if cuda_arch < 70:
supports_unified_shared_memory = False
except ValueError:
# If the architecture is invalid, assume it is supported.
supports_unified_shared_memory = True
elif config.libomptarget_current_target.startswith('amdgcn'):
# amdgpu_test_arch contains a list of AMD GPUs in the system
# only check the first one assuming that we will run the test on it.
if (config.amdgpu_test_arch.startswith("gfx90a") or
config.amdgpu_test_arch.startswith("gfx942") or
config.amdgpu_test_arch.startswith("gfx950")):
supports_large_allocation_memory_pool = True
else:
supports_unified_shared_memory = False
# check if AMD architecture is an APU:
if ((config.amdgpu_test_arch.startswith("gfx942") and
evaluate_bool_env(config.environment['IS_APU']))):
supports_apu = True
if supports_unified_shared_memory:
config.available_features.add('unified_shared_memory')
if supports_apu:
config.available_features.add('apu')
if supports_large_allocation_memory_pool:
config.available_features.add('large_allocation_memory_pool')
# Setup environment to find dynamic library at runtime
if config.operating_system == 'Windows':
append_dynamic_library_path('PATH', config.library_dir, ";")
append_dynamic_library_path('PATH', config.omp_host_rtl_directory, ";")
elif config.operating_system == 'Darwin':
append_dynamic_library_path('DYLD_LIBRARY_PATH', config.library_dir, ":")
append_dynamic_library_path('DYLD_LIBRARY_PATH', \
config.omp_host_rtl_directory, ";")
config.test_flags += " -Wl,-rpath," + config.library_dir
config.test_flags += " -Wl,-rpath," + config.omp_host_rtl_directory
else: # Unices
config.test_flags += " -Wl,-rpath," + config.library_dir
config.test_flags += " -Wl,-rpath," + config.omp_host_rtl_directory
config.test_flags += " -Wl,-rpath," + config.llvm_library_intdir
config.test_flags += " -Wl,-rpath," + config.llvm_lib_directory
if config.cuda_libdir:
config.test_flags += " -Wl,-rpath," + config.cuda_libdir
if config.libomptarget_current_target.startswith('nvptx'):
config.test_flags_clang += " --libomptarget-nvptx-bc-path=" + config.llvm_library_intdir + "/nvptx64-nvidia-cuda"
if config.libomptarget_current_target.endswith('-LTO'):
config.test_flags += " -foffload-lto"
if config.libomptarget_current_target.endswith('-JIT-LTO') and evaluate_bool_env(
config.environment['LIBOMPTARGET_NEXTGEN_PLUGINS']
):
config.test_flags += " -foffload-lto"
config.test_flags += " -Wl,--embed-bitcode"
# Add platform targets
host_targets = [
"aarch64-unknown-linux-gnu",
"x86_64-unknown-linux-gnu",
"s390x-ibm-linux-gnu",
]
if config.libomptarget_current_target.startswith('nvptx'):
config.available_features.add('gpu')
config.available_features.add('nvidiagpu')
if config.libomptarget_current_target.startswith('amdgcn'):
config.available_features.add('gpu')
config.available_features.add('amdgpu')
if config.libomptarget_current_target.startswith('spirv64-intel'):
config.available_features.add('gpu')
config.available_features.add('intelgpu')
if config.libomptarget_current_target in host_targets:
config.available_features.add('host')
def remove_suffix_if_present(name):
if name.endswith('-LTO'):
return name[:-4]
elif name.endswith('-JIT-LTO'):
return name[:-8]
else:
return name
def add_libraries(source):
if "gpu" not in config.available_features:
return source
if "intelgpu" in config.available_features:
# SPIR-V uses an out-of-tree linker and libc doesn't work.
return source
if config.libomptarget_has_libc:
return source + " -Xoffload-linker -lc " + \
"-Xoffload-linker -lm " + \
"-Xoffload-linker -lompdevice"
else:
return source + " " + "-Xoffload-linker -lompdevice"
# substitutions
# - for targets that exist in the system create the actual command.
# - for valid targets that do not exist in the system, return false, so that the
# same test can be used for different targets.
# Scan all the valid targets.
for libomptarget_target in config.libomptarget_all_targets:
# Is this target in the current system? If so create a compile, run and test
# command. Otherwise create command that return false.
if libomptarget_target == config.libomptarget_current_target:
config.substitutions.append(("%libomptarget-compilexx-run-and-check-generic",
"%libomptarget-compilexx-run-and-check-" + libomptarget_target))
config.substitutions.append(("%libomptarget-compile-run-and-check-generic",
"%libomptarget-compile-run-and-check-" + libomptarget_target))
config.substitutions.append(("%libomptarget-compile-fortran-run-and-check-generic",
"%libomptarget-compile-fortran-run-and-check-" + libomptarget_target))
config.substitutions.append(("%libomptarget-compilexx-and-run-generic",
"%libomptarget-compilexx-and-run-" + libomptarget_target))
config.substitutions.append(("%libomptarget-compile-and-run-generic",
"%libomptarget-compile-and-run-" + libomptarget_target))
config.substitutions.append(("%libomptarget-compilexx-generic",
"%libomptarget-compilexx-" + libomptarget_target))
config.substitutions.append(("%libomptarget-compilexxx-generic-force-usm",
"%libomptarget-compilexxx-force-usm-" + libomptarget_target))
config.substitutions.append(("%libomptarget-compile-generic",
"%libomptarget-compile-" + libomptarget_target))
config.substitutions.append(("%libomptarget-compile-fortran-generic",
"%libomptarget-compile-fortran-" + libomptarget_target))
config.substitutions.append(("%libomptarget-compileoptxx-run-and-check-generic",
"%libomptarget-compileoptxx-run-and-check-" + libomptarget_target))
config.substitutions.append(("%libomptarget-compileopt-run-and-check-generic",
"%libomptarget-compileopt-run-and-check-" + libomptarget_target))
config.substitutions.append(("%libomptarget-compileoptxx-and-run-generic",
"%libomptarget-compileoptxx-and-run-" + libomptarget_target))
config.substitutions.append(("%libomptarget-compileopt-and-run-generic",
"%libomptarget-compileopt-and-run-" + libomptarget_target))
config.substitutions.append(("%libomptarget-compileoptxx-generic",
"%libomptarget-compileoptxx-" + libomptarget_target))
config.substitutions.append(("%libomptarget-compileopt-generic",
"%libomptarget-compileopt-" + libomptarget_target))
config.substitutions.append(("%libomptarget-run-generic",
"%libomptarget-run-" + libomptarget_target))
config.substitutions.append(("%libomptarget-run-fail-generic",
"%libomptarget-run-fail-" + libomptarget_target))
config.substitutions.append(("%clangxx-generic",
"%clangxx-" + libomptarget_target))
config.substitutions.append(("%clang-generic",
"%clang-" + libomptarget_target))
config.substitutions.append(("%fcheck-generic",
config.libomptarget_filecheck + " %s"))
config.substitutions.append(("%fcheck-plain-generic",
config.libomptarget_filecheck))
config.substitutions.append(("%libomptarget-compilexx-run-and-check-" + \
libomptarget_target, \
"%libomptarget-compilexx-and-run-" + libomptarget_target + \
" | " + config.libomptarget_filecheck + " %s"))
config.substitutions.append(("%libomptarget-compile-run-and-check-" + \
libomptarget_target, \
"%libomptarget-compile-and-run-" + libomptarget_target + \
" | " + config.libomptarget_filecheck + " %s"))
config.substitutions.append(("%libomptarget-compile-fortran-run-and-check-" + \
libomptarget_target, \
"%libomptarget-compile-fortran-and-run-" + libomptarget_target + \
" | " + config.libomptarget_filecheck + " %s"))
config.substitutions.append(("%libomptarget-compilexx-and-run-" + \
libomptarget_target, \
"%libomptarget-compilexx-" + libomptarget_target + " && " + \
"%libomptarget-run-" + libomptarget_target))
config.substitutions.append(("%libomptarget-compile-and-run-" + \
libomptarget_target, \
"%libomptarget-compile-" + libomptarget_target + " && " + \
"%libomptarget-run-" + libomptarget_target))
config.substitutions.append(("%libomptarget-compile-fortran-and-run-" + \
libomptarget_target, \
"%libomptarget-compile-fortran-" + libomptarget_target + " && " + \
"%libomptarget-run-" + libomptarget_target))
config.substitutions.append(("%libomptarget-compilexx-" + \
libomptarget_target, \
"%clangxx-" + libomptarget_target + add_libraries(" %s -o %t")))
config.substitutions.append(("%libomptarget-compilexxx-force-usm-" +
libomptarget_target, "%clangxxx-force-usm-" + libomptarget_target + \
add_libraries(" %s -o %t")))
config.substitutions.append(("%libomptarget-compile-" + \
libomptarget_target, \
"%clang-" + libomptarget_target + add_libraries(" %s -o %t")))
config.substitutions.append(("%libomptarget-compile-fortran-" + \
libomptarget_target, \
"%flang-" + libomptarget_target + add_libraries(" %s -o %t")))
config.substitutions.append(("%libomptarget-compileoptxx-run-and-check-" + \
libomptarget_target, \
"%libomptarget-compileoptxx-and-run-" + libomptarget_target + \
" | " + config.libomptarget_filecheck + " %s"))
config.substitutions.append(("%libomptarget-compileopt-run-and-check-" + \
libomptarget_target, \
"%libomptarget-compileopt-and-run-" + libomptarget_target + \
" | " + config.libomptarget_filecheck + " %s"))
config.substitutions.append(("%libomptarget-compileoptxx-and-run-" + \
libomptarget_target, \
"%libomptarget-compileoptxx-" + libomptarget_target + " && " + \
"%libomptarget-run-" + libomptarget_target))
config.substitutions.append(("%libomptarget-compileopt-and-run-" + \
libomptarget_target, \
"%libomptarget-compileopt-" + libomptarget_target + " && " + \
"%libomptarget-run-" + libomptarget_target))
config.substitutions.append(("%libomptarget-compileoptxx-" + \
libomptarget_target, \
"%clangxx-" + libomptarget_target + add_libraries(" -O3 %s -o %t")))
config.substitutions.append(("%libomptarget-compileopt-" + \
libomptarget_target, \
"%clang-" + libomptarget_target + add_libraries(" -O3 %s -o %t")))
config.substitutions.append(("%libomptarget-run-" + \
libomptarget_target, \
"%t"))
config.substitutions.append(("%libomptarget-run-fail-" + \
libomptarget_target, \
"%not --crash %t"))
config.substitutions.append(("%clangxx-" + libomptarget_target, \
"%clangxx %openmp_flags %cuda_flags %flags %flags_clang -fopenmp-targets=" +\
remove_suffix_if_present(libomptarget_target)))
config.substitutions.append(("%clangxxx-force-usm-" + libomptarget_target, \
"%clangxx %openmp_flags -fopenmp-force-usm %cuda_flags %flags %flags_clang -fopenmp-targets=" +\
remove_suffix_if_present(libomptarget_target)))
config.substitutions.append(("%clang-" + libomptarget_target, \
"%clang %openmp_flags %cuda_flags %flags %flags_clang -fopenmp-targets=" +\
remove_suffix_if_present(libomptarget_target)))
config.substitutions.append(("%flang-" + libomptarget_target, \
"%flang %openmp_flags %flags %flags_flang -fopenmp-targets=" +\
remove_suffix_if_present(libomptarget_target)))
config.substitutions.append(("%fcheck-" + libomptarget_target, \
config.libomptarget_filecheck + " %s"))
else:
config.substitutions.append(("%libomptarget-compile-run-and-check-" + \
libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%libomptarget-compile-fortran-run-and-check-" + \
libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%libomptarget-compilexx-run-and-check-" + \
libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%libomptarget-compile-and-run-" + \
libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%libomptarget-compile-fortran-and-run-" + \
libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%libomptarget-compilexx-and-run-" + \
libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%libomptarget-compilexx-" + \
libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%libomptarget-compile-" + \
libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%libomptarget-compile-fortran-" + \
libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%libomptarget-compileopt-run-and-check-" + \
libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%libomptarget-compileoptxx-run-and-check-" + \
libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%libomptarget-compileopt-and-run-" + \
libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%libomptarget-compileoptxx-and-run-" + \
libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%libomptarget-compileoptxx-" + \
libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%libomptarget-compileopt-" + \
libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%libomptarget-run-" + \
libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%libomptarget-run-fail-" + \
libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%clang-" + libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%clangxx-" + libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%fcheck-" + libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%flang-" + libomptarget_target, \
"echo ignored-command"))
config.substitutions.append(("%clangxx", config.test_cxx_compiler))
config.substitutions.append(("%clang", config.test_c_compiler))
if config.test_fortran_compiler:
config.available_features.add('flang')
config.substitutions.append(("%flang", config.test_fortran_compiler))
config.substitutions.append(("%target_triple", config.libomptarget_current_target))
config.substitutions.append(("%openmp_flags", config.test_openmp_flags))
if config.libomptarget_current_target.startswith('nvptx') and config.cuda_path:
config.substitutions.append(("%cuda_flags", "--cuda-path=" + config.cuda_path))
else:
config.substitutions.append(("%cuda_flags", ""))
config.substitutions.append(("%flags_clang", config.test_flags_clang))
config.substitutions.append(("%flags_flang", config.test_flags_flang))
config.substitutions.append(("%flags", config.test_flags))
config.substitutions.append(("%not", config.libomptarget_not))
config.substitutions.append(("%offload-device-info",
config.offload_device_info))
config.substitutions.append(("%offload-tblgen", config.offload_tblgen))