Joseph Huber bb7ab2557c
[OpenMP] Port the OpenMP device runtime to direct C++ compilation (#123673)
Summary:
This removes the use of OpenMP offloading to build the device runtime.
The main benefit here is that we no longer need to rely on offloading
semantics to build a device only runtime. Things like variants are now
no longer needed and can just be simple if-defs. In the future, I will
remove most of the special handling here and fold it into calls to the
`<gpuintrin.h>` functions instead. Additionally I will rework the
compilation to make this a separate runtime.

The current plan is to have this, but make including OpenMP and
offloading either automatically add it, or print a warning if it's
missing. This will allow us to use a normal CMake workflow and delete
all the weird 'lets pull the clang binary out of the build' business.
```
-DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES=offload
-DLLVM_RUNTIME_TARGETS=amdgcn-amd-amdhsa
```

After that, linking the OpenMP device runtime will be `-Xoffload-linker
-lomp`. I.e. no more fat binary business.

Only look at the most recent commit since this includes the two
dependencies
(fix to AMDGPUEmitPrintfBinding and the PointerToMember bug).
2025-02-05 08:18:52 -06:00

97 lines
3.5 KiB
C++

//===--- DeviceUtils.h - OpenMP device runtime utility functions -- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//
//===----------------------------------------------------------------------===//
#ifndef OMPTARGET_DEVICERTL_DEVICE_UTILS_H
#define OMPTARGET_DEVICERTL_DEVICE_UTILS_H
#include "DeviceTypes.h"
#include "Shared/Utils.h"
namespace utils {
template <typename T> struct type_identity {
using type = T;
};
template <typename T, T v> struct integral_constant {
inline static constexpr T value = v;
};
/// Freestanding SFINAE helpers.
template <class T> struct remove_cv : type_identity<T> {};
template <class T> struct remove_cv<const T> : type_identity<T> {};
template <class T> struct remove_cv<volatile T> : type_identity<T> {};
template <class T> struct remove_cv<const volatile T> : type_identity<T> {};
template <class T> using remove_cv_t = typename remove_cv<T>::type;
using true_type = integral_constant<bool, true>;
using false_type = integral_constant<bool, false>;
template <typename T, typename U> struct is_same : false_type {};
template <typename T> struct is_same<T, T> : true_type {};
template <typename T, typename U>
inline constexpr bool is_same_v = is_same<T, U>::value;
template <typename T> struct is_floating_point {
inline static constexpr bool value =
is_same_v<remove_cv_t<T>, float> || is_same_v<remove_cv_t<T>, double>;
};
template <typename T>
inline constexpr bool is_floating_point_v = is_floating_point<T>::value;
template <bool B, typename T = void> struct enable_if;
template <typename T> struct enable_if<true, T> : type_identity<T> {};
template <bool B, typename T = void>
using enable_if_t = typename enable_if<B, T>::type;
template <class T> struct remove_addrspace : type_identity<T> {};
template <class T, int N>
struct remove_addrspace<T [[clang::address_space(N)]]> : type_identity<T> {};
template <class T>
using remove_addrspace_t = typename remove_addrspace<T>::type;
template <typename To, typename From> inline To bitCast(From V) {
static_assert(sizeof(To) == sizeof(From), "Bad conversion");
return __builtin_bit_cast(To, V);
}
/// Return the value \p Var from thread Id \p SrcLane in the warp if the thread
/// is identified by \p Mask.
int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width);
int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width);
int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta, int32_t Width);
uint64_t ballotSync(uint64_t Mask, int32_t Pred);
/// Return \p LowBits and \p HighBits packed into a single 64 bit value.
uint64_t pack(uint32_t LowBits, uint32_t HighBits);
/// Unpack \p Val into \p LowBits and \p HighBits.
void unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits);
/// Return true iff \p Ptr is pointing into shared (local) memory (AS(3)).
bool isSharedMemPtr(void *Ptr);
/// Return true iff \p Ptr is pointing into (thread) local memory (AS(5)).
bool isThreadLocalMemPtr(void *Ptr);
/// A pointer variable that has by design an `undef` value. Use with care.
[[clang::loader_uninitialized]] static void *const UndefPtr;
#define OMP_LIKELY(EXPR) __builtin_expect((bool)(EXPR), true)
#define OMP_UNLIKELY(EXPR) __builtin_expect((bool)(EXPR), false)
} // namespace utils
#endif