This PR adds support in the Level Zero plugin to execute constructors/destructors on the device code. As spirv-link has some limitations, it mimics the CUDA plugin behavior where the RTL constructs the device side tables before invoking the kernel that will execute them. The kernel and other necessary symbols to create the device tables are created by the SPIRVCtorDtorLowering pass to be added in #187509
666 lines
24 KiB
C++
666 lines
24 KiB
C++
//===--- Level Zero Target RTL Implementation -----------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// GenericDevice instatiation for SPIR-V/Xe machine.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_LEVEL_ZERO_L0DEVICE_H
|
|
#define OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_LEVEL_ZERO_L0DEVICE_H
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
|
|
|
#include "PerThreadTable.h"
|
|
|
|
#include "AsyncQueue.h"
|
|
#include "L0Context.h"
|
|
#include "L0Program.h"
|
|
#include "PluginInterface.h"
|
|
#include "TLS.h"
|
|
|
|
namespace llvm::omp::target::plugin {
|
|
|
|
using OmpInteropTy = omp_interop_val_t *;
|
|
class LevelZeroPluginTy;
|
|
|
|
// clang-format off
|
|
enum class PCIIdTy : int32_t {
|
|
None = 0x0000,
|
|
SKL = 0x1900,
|
|
KBL = 0x5900,
|
|
CFL = 0x3E00,
|
|
CFL_2 = 0x9B00,
|
|
ICX = 0x8A00,
|
|
TGL = 0xFF20,
|
|
TGL_2 = 0x9A00,
|
|
DG1 = 0x4900,
|
|
RKL = 0x4C00,
|
|
ADLS = 0x4600,
|
|
RTL = 0xA700,
|
|
MTL = 0x7D00,
|
|
PVC = 0x0B00,
|
|
DG2_ATS_M = 0x4F00,
|
|
DG2_ATS_M_2 = 0x5600,
|
|
LNL = 0x6400,
|
|
BMG = 0xE200,
|
|
};
|
|
|
|
/// Device type enumeration common to compiler and runtime.
|
|
enum class DeviceArchTy : uint64_t {
|
|
DeviceArch_None = 0,
|
|
DeviceArch_Gen = 0x0001, // Gen 9, Gen 11 or Xe
|
|
DeviceArch_XeLPG = 0x0002,
|
|
DeviceArch_XeHPC = 0x0004,
|
|
DeviceArch_XeHPG = 0x0008,
|
|
DeviceArch_Xe2LP = 0x0010,
|
|
DeviceArch_Xe2HP = 0x0020,
|
|
DeviceArch_x86_64 = 0x0100
|
|
};
|
|
// clang-format on
|
|
|
|
struct L0DeviceIdTy {
|
|
ze_device_handle_t zeId;
|
|
int32_t RootId;
|
|
int32_t SubId;
|
|
int32_t CCSId;
|
|
|
|
L0DeviceIdTy(ze_device_handle_t Device, int32_t RootId, int32_t SubId = -1,
|
|
int32_t CCSId = -1)
|
|
: zeId(Device), RootId(RootId), SubId(SubId), CCSId(CCSId) {}
|
|
};
|
|
|
|
class L0DeviceTLSTy {
|
|
/// Command list for each device.
|
|
ze_command_list_handle_t CmdList = nullptr;
|
|
|
|
/// Main copy command list for each device.
|
|
ze_command_list_handle_t CopyCmdList = nullptr;
|
|
|
|
/// Command queue for each device.
|
|
ze_command_queue_handle_t CmdQueue = nullptr;
|
|
|
|
/// Main copy command queue for each device.
|
|
ze_command_queue_handle_t CopyCmdQueue = nullptr;
|
|
|
|
/// Immediate command list for each device.
|
|
ze_command_list_handle_t ImmCmdList = nullptr;
|
|
|
|
/// Immediate copy command list for each device.
|
|
ze_command_list_handle_t ImmCopyCmdList = nullptr;
|
|
|
|
public:
|
|
L0DeviceTLSTy() = default;
|
|
~L0DeviceTLSTy() {
|
|
// assert all fields are nullptr on destruction.
|
|
assert(!CmdList && !CopyCmdList && !CmdQueue && !CopyCmdQueue &&
|
|
!ImmCmdList && !ImmCopyCmdList &&
|
|
"L0DeviceTLSTy destroyed without clearing resources");
|
|
}
|
|
|
|
L0DeviceTLSTy(const L0DeviceTLSTy &) = delete;
|
|
L0DeviceTLSTy(L0DeviceTLSTy &&Other) {
|
|
CmdList = std::exchange(Other.CmdList, nullptr);
|
|
CopyCmdList = std::exchange(Other.CopyCmdList, nullptr);
|
|
CmdQueue = std::exchange(Other.CmdQueue, nullptr);
|
|
CopyCmdQueue = std::exchange(Other.CopyCmdQueue, nullptr);
|
|
ImmCmdList = std::exchange(Other.ImmCmdList, nullptr);
|
|
ImmCopyCmdList = std::exchange(Other.ImmCopyCmdList, nullptr);
|
|
}
|
|
|
|
Error deinit() {
|
|
// destroy all lists and queues.
|
|
if (CmdList)
|
|
CALL_ZE_RET_ERROR(zeCommandListDestroy, CmdList);
|
|
if (CopyCmdList)
|
|
CALL_ZE_RET_ERROR(zeCommandListDestroy, CopyCmdList);
|
|
if (ImmCmdList)
|
|
CALL_ZE_RET_ERROR(zeCommandListDestroy, ImmCmdList);
|
|
if (ImmCopyCmdList)
|
|
CALL_ZE_RET_ERROR(zeCommandListDestroy, ImmCopyCmdList);
|
|
if (CmdQueue)
|
|
CALL_ZE_RET_ERROR(zeCommandQueueDestroy, CmdQueue);
|
|
if (CopyCmdQueue)
|
|
CALL_ZE_RET_ERROR(zeCommandQueueDestroy, CopyCmdQueue);
|
|
|
|
CmdList = nullptr;
|
|
CopyCmdList = nullptr;
|
|
CmdQueue = nullptr;
|
|
CopyCmdQueue = nullptr;
|
|
ImmCmdList = nullptr;
|
|
ImmCopyCmdList = nullptr;
|
|
|
|
return Plugin::success();
|
|
}
|
|
|
|
L0DeviceTLSTy &operator=(const L0DeviceTLSTy &) = delete;
|
|
L0DeviceTLSTy &operator=(L0DeviceTLSTy &&) = delete;
|
|
|
|
ze_command_list_handle_t getCmdList() const { return CmdList; }
|
|
void setCmdList(ze_command_list_handle_t _CmdList) { CmdList = _CmdList; }
|
|
|
|
ze_command_list_handle_t getCopyCmdList() const { return CopyCmdList; }
|
|
void setCopyCmdList(ze_command_list_handle_t _CopyCmdList) {
|
|
CopyCmdList = _CopyCmdList;
|
|
}
|
|
|
|
ze_command_list_handle_t getImmCmdList() const { return ImmCmdList; }
|
|
void setImmCmdList(ze_command_list_handle_t ImmCmdListIn) {
|
|
ImmCmdList = ImmCmdListIn;
|
|
}
|
|
|
|
ze_command_list_handle_t getImmCopyCmdList() const { return ImmCopyCmdList; }
|
|
void setImmCopyCmdList(ze_command_list_handle_t ImmCopyCmdListIn) {
|
|
ImmCopyCmdList = ImmCopyCmdListIn;
|
|
}
|
|
|
|
ze_command_queue_handle_t getCmdQueue() const { return CmdQueue; }
|
|
void setCmdQueue(ze_command_queue_handle_t CmdQueueIn) {
|
|
CmdQueue = CmdQueueIn;
|
|
}
|
|
|
|
ze_command_queue_handle_t getCopyCmdQueue() const { return CopyCmdQueue; }
|
|
void setCopyCmdQueue(ze_command_queue_handle_t CopyCmdQueueIn) {
|
|
CopyCmdQueue = CopyCmdQueueIn;
|
|
}
|
|
};
|
|
|
|
struct L0DeviceTLSTableTy
|
|
: public PerThreadContainer<std::vector<L0DeviceTLSTy>, 8> {
|
|
Error deinit() {
|
|
return PerThreadTable::deinit(
|
|
[](L0DeviceTLSTy &Entry) { return Entry.deinit(); });
|
|
}
|
|
};
|
|
|
|
class L0DeviceTy final : public GenericDeviceTy {
|
|
// Level Zero Context for this Device.
|
|
L0ContextTy &l0Context;
|
|
|
|
// Level Zero handle for this Device.
|
|
ze_device_handle_t zeDevice;
|
|
// Device Properties.
|
|
ze_device_properties_t DeviceProperties{};
|
|
ze_device_compute_properties_t ComputeProperties{};
|
|
ze_device_memory_properties_t MemoryProperties{};
|
|
ze_device_cache_properties_t CacheProperties{};
|
|
|
|
/// Devices' default target allocation kind for internal allocation.
|
|
int32_t AllocKind = TARGET_ALLOC_DEVICE;
|
|
|
|
DeviceArchTy DeviceArch = DeviceArchTy::DeviceArch_None;
|
|
|
|
std::string DeviceName;
|
|
|
|
/// Common indirect access flags for this device.
|
|
ze_kernel_indirect_access_flags_t IndirectAccessFlags = 0;
|
|
|
|
/// Device UUID for toplevel devices only.
|
|
std::string DeviceUuid;
|
|
|
|
/// L0 Device ID as string.
|
|
std::string zeId;
|
|
|
|
/// Command queue group ordinals for each device.
|
|
static constexpr uint32_t MaxOrdinal =
|
|
std::numeric_limits<decltype(MaxOrdinal)>::max();
|
|
std::pair<uint32_t, uint32_t> ComputeOrdinal{MaxOrdinal, 0};
|
|
/// Command queue group ordinals for copying.
|
|
std::pair<uint32_t, uint32_t> CopyOrdinal{MaxOrdinal, 0};
|
|
|
|
/// Command queue index for each device.
|
|
uint32_t ComputeIndex = 0;
|
|
|
|
bool IsAsyncEnabled = false;
|
|
|
|
/// Lock for this device.
|
|
std::mutex Mutex;
|
|
|
|
/// Contains all modules (possibly from multiple device images) to handle
|
|
/// dynamic link across multiple images
|
|
llvm::SmallVector<ze_module_handle_t> GlobalModules;
|
|
|
|
/// L0 programs created for this device
|
|
std::list<L0ProgramTy> Programs;
|
|
|
|
/// MemAllocator for this device.
|
|
MemAllocatorTy MemAllocator;
|
|
|
|
DeviceArchTy computeArch() const;
|
|
|
|
/// Get default compute group ordinal. Returns Ordinal-NumQueues pair.
|
|
std::pair<uint32_t, uint32_t> findComputeOrdinal();
|
|
|
|
/// Get copy command queue group ordinal. Returns Ordinal-NumQueues pair.
|
|
std::pair<uint32_t, uint32_t> findCopyOrdinal(bool LinkCopy = false);
|
|
|
|
/// Helper function to call global constructors or destructors.
|
|
Error callGlobalCtorDtorCommon(GenericPluginTy &Plugin, DeviceImageTy &Image,
|
|
bool IsCtor);
|
|
|
|
public:
|
|
L0DeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices,
|
|
ze_device_handle_t zeDevice, L0ContextTy &DriverInfo,
|
|
const std::string_view zeId, int32_t ComputeIndex)
|
|
: GenericDeviceTy(Plugin, DeviceId, NumDevices, SPIRVGridValues),
|
|
l0Context(DriverInfo), zeDevice(zeDevice), zeId(zeId),
|
|
ComputeIndex(ComputeIndex) {
|
|
DeviceProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
|
|
DeviceProperties.pNext = nullptr;
|
|
ComputeProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_COMPUTE_PROPERTIES;
|
|
ComputeProperties.pNext = nullptr;
|
|
MemoryProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_MEMORY_PROPERTIES;
|
|
MemoryProperties.pNext = nullptr;
|
|
CacheProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_CACHE_PROPERTIES;
|
|
CacheProperties.pNext = nullptr;
|
|
}
|
|
|
|
static L0DeviceTy &makeL0Device(GenericDeviceTy &Device) {
|
|
return static_cast<L0DeviceTy &>(Device);
|
|
}
|
|
LevelZeroPluginTy &getPlugin() {
|
|
return reinterpret_cast<LevelZeroPluginTy &>(Plugin);
|
|
}
|
|
|
|
L0DeviceTLSTy &getTLS();
|
|
|
|
Error setContext() override { return Plugin::success(); }
|
|
Error initImpl(GenericPluginTy &Plugin) override;
|
|
Error deinitImpl() override;
|
|
ze_device_handle_t getZeDevice() const { return zeDevice; }
|
|
|
|
const L0ContextTy &getL0Context() const { return l0Context; }
|
|
L0ContextTy &getL0Context() { return l0Context; }
|
|
|
|
const std::string_view getName() const { return DeviceName; }
|
|
const char *getNameCStr() const { return DeviceName.c_str(); }
|
|
|
|
const char *getArchCStr() const;
|
|
|
|
const std::string_view getZeId() const { return zeId; }
|
|
const char *getZeIdCStr() const { return zeId.c_str(); }
|
|
|
|
std::mutex &getMutex() { return Mutex; }
|
|
|
|
uint32_t getComputeIndex() const { return ComputeIndex; }
|
|
ze_kernel_indirect_access_flags_t getIndirectFlags() const {
|
|
return IndirectAccessFlags;
|
|
}
|
|
|
|
size_t getNumGlobalModules() const { return GlobalModules.size(); }
|
|
void addGlobalModule(ze_module_handle_t Module) {
|
|
GlobalModules.push_back(Module);
|
|
}
|
|
ze_module_handle_t *getGlobalModulesArray() { return GlobalModules.data(); }
|
|
|
|
L0ProgramTy *getProgramFromImage(MemoryBufferRef Image) {
|
|
for (auto &PGM : Programs)
|
|
if (PGM.getMemoryBuffer() == Image)
|
|
return &PGM;
|
|
return nullptr;
|
|
}
|
|
|
|
Error buildAllKernels() {
|
|
for (auto &PGM : Programs) {
|
|
if (auto Err = PGM.loadModuleKernels())
|
|
return Err;
|
|
}
|
|
return Plugin::success();
|
|
}
|
|
|
|
// add a new program to the device. Return a reference to the new program.
|
|
Expected<L0ProgramTy &> addProgram(int32_t ImageId,
|
|
L0ProgramBuilderTy &Builder) {
|
|
auto ImageOrErr = Builder.getELF();
|
|
if (!ImageOrErr)
|
|
return ImageOrErr.takeError();
|
|
Programs.emplace_back(ImageId, *this, std::move(*ImageOrErr),
|
|
Builder.getGlobalModule(),
|
|
std::move(Builder.getModules()));
|
|
return Programs.back();
|
|
}
|
|
|
|
const L0ProgramTy &getLastProgram() const { return Programs.back(); }
|
|
L0ProgramTy &getLastProgram() { return Programs.back(); }
|
|
// Device properties getters.
|
|
uint32_t getVendorId() const { return DeviceProperties.vendorId; }
|
|
bool isGPU() const { return DeviceProperties.type == ZE_DEVICE_TYPE_GPU; }
|
|
|
|
uint32_t getPCIId() const { return DeviceProperties.deviceId; }
|
|
uint32_t getNumThreadsPerEU() const {
|
|
return DeviceProperties.numThreadsPerEU;
|
|
}
|
|
uint32_t getSIMDWidth() const { return DeviceProperties.physicalEUSimdWidth; }
|
|
uint32_t getNumEUsPerSubslice() const {
|
|
return DeviceProperties.numEUsPerSubslice;
|
|
}
|
|
uint32_t getNumSubslicesPerSlice() const {
|
|
return DeviceProperties.numSubslicesPerSlice;
|
|
}
|
|
uint32_t getNumSlices() const { return DeviceProperties.numSlices; }
|
|
uint32_t getNumSubslices() const {
|
|
return DeviceProperties.numSubslicesPerSlice * DeviceProperties.numSlices;
|
|
}
|
|
uint32_t getNumEUs() const {
|
|
return DeviceProperties.numEUsPerSubslice * getNumSubslices();
|
|
}
|
|
uint32_t getTotalThreads() const {
|
|
return DeviceProperties.numThreadsPerEU * getNumEUs();
|
|
}
|
|
uint32_t getNumThreadsPerSubslice() const {
|
|
return getNumEUsPerSubslice() * getNumThreadsPerEU();
|
|
}
|
|
uint32_t getClockRate() const { return DeviceProperties.coreClockRate; }
|
|
|
|
uint32_t getMaxSharedLocalMemory() const {
|
|
return ComputeProperties.maxSharedLocalMemory;
|
|
}
|
|
uint32_t getMaxGroupSize() const {
|
|
return ComputeProperties.maxTotalGroupSize;
|
|
}
|
|
uint32_t getMaxGroupCount() const {
|
|
return getMaxGroupCountX() * getMaxGroupCountY() * getMaxGroupCountZ();
|
|
}
|
|
|
|
uint32_t getMaxGroupSizeX() const { return ComputeProperties.maxGroupSizeX; }
|
|
uint32_t getMaxGroupSizeY() const { return ComputeProperties.maxGroupSizeY; }
|
|
uint32_t getMaxGroupSizeZ() const { return ComputeProperties.maxGroupSizeZ; }
|
|
uint32_t getMaxGroupCountX() const {
|
|
return ComputeProperties.maxGroupCountX;
|
|
}
|
|
uint32_t getMaxGroupCountY() const {
|
|
return ComputeProperties.maxGroupCountY;
|
|
}
|
|
uint32_t getMaxGroupCountZ() const {
|
|
return ComputeProperties.maxGroupCountZ;
|
|
}
|
|
uint32_t getMemoryClockRate() const { return MemoryProperties.maxClockRate; }
|
|
uint64_t getGlobalMemorySize() const { return MemoryProperties.totalSize; }
|
|
size_t getCacheSize() const { return CacheProperties.cacheSize; }
|
|
uint64_t getMaxMemAllocSize() const {
|
|
return DeviceProperties.maxMemAllocSize;
|
|
}
|
|
|
|
int32_t getAllocKind() const { return AllocKind; }
|
|
DeviceArchTy getDeviceArch() const { return DeviceArch; }
|
|
bool isDeviceArch(DeviceArchTy Arch) const { return DeviceArch == Arch; }
|
|
|
|
static bool isDiscrete(uint32_t PCIId) {
|
|
switch (static_cast<PCIIdTy>(PCIId & 0xFF00)) {
|
|
case PCIIdTy::DG1:
|
|
case PCIIdTy::PVC:
|
|
case PCIIdTy::DG2_ATS_M:
|
|
case PCIIdTy::DG2_ATS_M_2:
|
|
case PCIIdTy::BMG:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static bool isDiscrete(ze_device_handle_t Device) {
|
|
ze_device_properties_t PR{};
|
|
PR.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
|
|
PR.pNext = nullptr;
|
|
CALL_ZE_RET(false, zeDeviceGetProperties, Device, &PR);
|
|
return isDiscrete(PR.deviceId);
|
|
}
|
|
|
|
bool isDiscreteDevice() { return isDiscrete(getPCIId()); }
|
|
bool isDeviceIPorNewer(uint32_t Version) const;
|
|
|
|
const std::string_view getUuid() const { return DeviceUuid; }
|
|
|
|
uint32_t getComputeEngine() const { return ComputeOrdinal.first; }
|
|
uint32_t getNumComputeQueues() const { return ComputeOrdinal.second; }
|
|
|
|
bool hasMainCopyEngine() const { return CopyOrdinal.first != MaxOrdinal; }
|
|
uint32_t getMainCopyEngine() const { return CopyOrdinal.first; }
|
|
|
|
bool deviceRequiresImmCmdList() const {
|
|
constexpr uint32_t BMGIP = 0x05004000;
|
|
return isDeviceIPorNewer(BMGIP);
|
|
}
|
|
bool asyncEnabled() const { return IsAsyncEnabled; }
|
|
bool useImmForCompute() const { return true; }
|
|
bool useImmForCopy() const { return true; }
|
|
bool useImmForInterop() const { return true; }
|
|
|
|
void reportDeviceInfo() const;
|
|
|
|
// Command queues related functions.
|
|
/// Create a command list with given ordinal and flags.
|
|
Expected<ze_command_list_handle_t>
|
|
createCmdList(ze_context_handle_t Context, ze_device_handle_t Device,
|
|
uint32_t Ordinal, ze_command_list_flags_t Flags,
|
|
const std::string_view DeviceIdStr);
|
|
|
|
/// Create a command list with default flags.
|
|
Expected<ze_command_list_handle_t>
|
|
createCmdList(ze_context_handle_t Context, ze_device_handle_t Device,
|
|
uint32_t Ordinal, const std::string_view DeviceIdStr);
|
|
|
|
Expected<ze_command_list_handle_t> getCmdList();
|
|
|
|
/// Create a command queue with given ordinal and flags.
|
|
Expected<ze_command_queue_handle_t>
|
|
createCmdQueue(ze_context_handle_t Context, ze_device_handle_t Device,
|
|
uint32_t Ordinal, uint32_t Index,
|
|
ze_command_queue_flags_t Flags,
|
|
const std::string_view DeviceIdStr);
|
|
|
|
/// Create a command queue with default flags.
|
|
Expected<ze_command_queue_handle_t>
|
|
createCmdQueue(ze_context_handle_t Context, ze_device_handle_t Device,
|
|
uint32_t Ordinal, uint32_t Index,
|
|
const std::string_view DeviceIdStr, bool InOrder = false);
|
|
|
|
/// Create a new command queue for the given OpenMP device ID.
|
|
Expected<ze_command_queue_handle_t> createCommandQueue(bool InOrder = false);
|
|
|
|
/// Create an immediate command list.
|
|
Expected<ze_command_list_handle_t>
|
|
createImmCmdList(uint32_t Ordinal, uint32_t Index, bool InOrder = false);
|
|
|
|
/// Create an immediate command list for computing.
|
|
Expected<ze_command_list_handle_t> createImmCmdList(bool InOrder = false) {
|
|
return createImmCmdList(getComputeEngine(), getComputeIndex(), InOrder);
|
|
}
|
|
|
|
/// Create an immediate command list for copying.
|
|
Expected<ze_command_list_handle_t> createImmCopyCmdList();
|
|
Expected<ze_command_queue_handle_t> getCmdQueue();
|
|
Expected<ze_command_list_handle_t> getCopyCmdList();
|
|
Expected<ze_command_queue_handle_t> getCopyCmdQueue();
|
|
Expected<ze_command_list_handle_t> getImmCmdList();
|
|
Expected<ze_command_list_handle_t> getImmCopyCmdList();
|
|
|
|
/// Enqueue copy command.
|
|
Error enqueueMemCopy(void *Dst, const void *Src, size_t Size,
|
|
__tgt_async_info *AsyncInfo = nullptr,
|
|
bool UseCopyEngine = true);
|
|
|
|
/// Enqueue asynchronous copy command.
|
|
Error enqueueMemCopyAsync(void *Dst, const void *Src, size_t Size,
|
|
__tgt_async_info *AsyncInfo, bool CopyTo = true);
|
|
|
|
/// Enqueue fill command.
|
|
Error enqueueMemFill(void *Ptr, const void *Pattern, size_t PatternSize,
|
|
size_t Size);
|
|
|
|
/// Driver related functions.
|
|
|
|
/// Reurn the driver handle for this device.
|
|
ze_driver_handle_t getZeDriver() const { return l0Context.getZeDriver(); }
|
|
|
|
/// Return context for this device.
|
|
ze_context_handle_t getZeContext() const { return l0Context.getZeContext(); }
|
|
|
|
/// Return driver API version for this device.
|
|
ze_api_version_t getDriverAPIVersion() const {
|
|
return l0Context.getDriverAPIVersion();
|
|
}
|
|
|
|
/// Return an event from the driver associated to this device.
|
|
Expected<ze_event_handle_t> getEvent() {
|
|
return l0Context.getEventPool().getEvent();
|
|
}
|
|
|
|
/// Release event to the pool associated to this device.
|
|
Error releaseEvent(ze_event_handle_t Event) {
|
|
return l0Context.getEventPool().releaseEvent(Event, *this);
|
|
}
|
|
|
|
StagingBufferTy &getStagingBuffer() { return l0Context.getStagingBuffer(); }
|
|
|
|
bool supportsLargeMem() const { return l0Context.supportsLargeMem(); }
|
|
|
|
// Allocation related routines.
|
|
|
|
/// Data alloc.
|
|
Expected<void *> dataAlloc(
|
|
size_t Size, size_t Align, int32_t Kind, intptr_t Offset, bool UserAlloc,
|
|
bool DevMalloc = false,
|
|
uint32_t MemAdvice = std::numeric_limits<decltype(MemAdvice)>::max(),
|
|
AllocOptionTy AllocOpt = AllocOptionTy::ALLOC_OPT_NONE);
|
|
|
|
/// Data delete.
|
|
Error dataDelete(void *Ptr);
|
|
|
|
/// Return the memory allocation type for the specified memory location.
|
|
uint32_t getMemAllocType(const void *Ptr) const;
|
|
|
|
const MemAllocatorTy &getDeviceMemAllocator() const { return MemAllocator; }
|
|
MemAllocatorTy &getDeviceMemAllocator() { return MemAllocator; }
|
|
|
|
MemAllocatorTy &getMemAllocator(int32_t Kind) {
|
|
if (Kind == TARGET_ALLOC_HOST)
|
|
return l0Context.getHostMemAllocator();
|
|
return getDeviceMemAllocator();
|
|
}
|
|
|
|
MemAllocatorTy &getMemAllocator(const void *Ptr) {
|
|
if (ZE_MEMORY_TYPE_HOST == getMemAllocType(Ptr))
|
|
return l0Context.getHostMemAllocator();
|
|
return getDeviceMemAllocator();
|
|
}
|
|
|
|
Error makeMemoryResident(void *Mem, size_t Size);
|
|
|
|
// Generic device interface implementation.
|
|
Expected<DeviceImageTy *>
|
|
loadBinaryImpl(std::unique_ptr<MemoryBuffer> &&TgtImage,
|
|
int32_t ImageId) override;
|
|
Error unloadBinaryImpl(DeviceImageTy *Image) override;
|
|
Expected<void *> allocate(size_t Size, void *HstPtr,
|
|
TargetAllocTy Kind) override;
|
|
Error free(void *TgtPtr, TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) override;
|
|
|
|
/// This plugin does nothing to lock buffers. Do not return an error, just
|
|
/// return the same pointer as the device pointer.
|
|
Expected<void *> dataLockImpl(void *HstPtr, int64_t Size) override {
|
|
return HstPtr;
|
|
}
|
|
Error dataUnlockImpl(void *HstPtr) override { return Plugin::success(); }
|
|
|
|
Expected<bool> isPinnedPtrImpl(void *, void *&, void *&,
|
|
size_t &) const override {
|
|
// Don't need to do anything, this is handled by the driver.
|
|
return false;
|
|
}
|
|
|
|
Expected<bool> isAccessiblePtrImpl(const void *Ptr, size_t Size) override;
|
|
Error dataFence(__tgt_async_info *Async) override;
|
|
Error dataFillImpl(void *TgtPtr, const void *PatternPtr, int64_t PatternSize,
|
|
int64_t Size,
|
|
AsyncInfoWrapperTy &AsyncInfoWrapper) override;
|
|
Error synchronizeImpl(__tgt_async_info &AsyncInfo,
|
|
bool ReleaseQueue) override;
|
|
Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
|
|
bool *IsQueueWorkCompleted) override;
|
|
Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size,
|
|
AsyncInfoWrapperTy &AsyncInfoWrapper) override;
|
|
Error dataRetrieveImpl(void *HstPtr, const void *TgtPtr, int64_t Size,
|
|
AsyncInfoWrapperTy &AsyncInfoWrapper) override;
|
|
Error dataExchangeImpl(const void *SrcPtr, GenericDeviceTy &DstDev,
|
|
void *DstPtr, int64_t Size,
|
|
AsyncInfoWrapperTy &AsyncInfoWrapper) override;
|
|
Error initAsyncInfoImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) override;
|
|
Expected<bool>
|
|
hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) override;
|
|
|
|
Error enqueueHostCallImpl(void (*Callback)(void *), void *UserData,
|
|
AsyncInfoWrapperTy &AsyncInfo) override {
|
|
return Plugin::error(ErrorCode::UNIMPLEMENTED,
|
|
"enqueueHostCallImpl not implemented yet");
|
|
}
|
|
|
|
// Event routines are used to ensure ordering between dataTransfers. Instead
|
|
// of adding extra events in the queues, we make sure they're ordered by
|
|
// using the events from the data submission APIs so we don't need to support
|
|
// these routines.
|
|
// They still need to report succes to indicate the event are handled
|
|
// somewhere waitEvent and syncEvent should remain unimplemented.
|
|
Expected<bool> isEventCompleteImpl(void *EventPtr,
|
|
AsyncInfoWrapperTy &) override {
|
|
return true;
|
|
}
|
|
|
|
Error createEventImpl(void **EventPtrStorage) override {
|
|
return Plugin::success();
|
|
}
|
|
Error destroyEventImpl(void *EventPtr) override { return Plugin::success(); }
|
|
Error recordEventImpl(void *EventPtr,
|
|
AsyncInfoWrapperTy &AsyncInfoWrapper) override {
|
|
return Plugin::success();
|
|
}
|
|
|
|
Error waitEventImpl(void *EventPtr,
|
|
AsyncInfoWrapperTy &AsyncInfoWrapper) override {
|
|
return Plugin::error(error::ErrorCode::UNKNOWN, "%s not implemented yet\n",
|
|
__func__);
|
|
}
|
|
|
|
Error syncEventImpl(void *EventPtr) override {
|
|
return Plugin::error(error::ErrorCode::UNKNOWN, "%s not implemented yet\n",
|
|
__func__);
|
|
}
|
|
|
|
Expected<InfoTreeNode> obtainInfoImpl() override;
|
|
uint64_t getClockFrequency() const override { return getClockRate(); }
|
|
uint64_t getHardwareParallelism() const override { return getTotalThreads(); }
|
|
Error getDeviceMemorySize(uint64_t &DSize) override {
|
|
DSize = getGlobalMemorySize();
|
|
return Plugin::success();
|
|
}
|
|
|
|
Error getDeviceStackSize(uint64_t &V) override {
|
|
V = 0;
|
|
return Plugin::success();
|
|
}
|
|
Expected<GenericKernelTy &> constructKernel(const char *Name) override;
|
|
|
|
Error callGlobalConstructors(GenericPluginTy &Plugin,
|
|
DeviceImageTy &Image) override;
|
|
|
|
Error callGlobalDestructors(GenericPluginTy &Plugin,
|
|
DeviceImageTy &Image) override;
|
|
|
|
Error setDeviceStackSize(uint64_t V) override { return Plugin::success(); }
|
|
|
|
Expected<omp_interop_val_t *>
|
|
createInterop(int32_t InteropType, interop_spec_t &InteropSpec) override;
|
|
Error releaseInterop(omp_interop_val_t *Interop) override;
|
|
|
|
interop_spec_t selectInteropPreference(int32_t InteropType,
|
|
int32_t NumPrefers,
|
|
interop_spec_t *Prefers) override;
|
|
};
|
|
|
|
} // namespace llvm::omp::target::plugin
|
|
#endif // OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_LEVEL_ZERO_L0DEVICE_H
|