[OpenMP][Offload] Continue to update libomptarget debug messages (#170425)
* Add support to use lambdas to output debug messages (like LDBG_OS) * Update messages for interface.cpp and omptarget.cpp
This commit is contained in:
parent
130fa98a29
commit
02a908c4c9
@ -430,6 +430,52 @@ static inline raw_ostream &operator<<(raw_ostream &Os,
|
||||
#define ODBG_RESET_LEVEL() \
|
||||
static_cast<llvm::offload::debug::odbg_ostream::IfLevel>(0)
|
||||
|
||||
// helper templates to support lambdas with different number of arguments
|
||||
template <typename LambdaTy> struct LambdaHelper {
|
||||
template <typename T, typename = std::void_t<>>
|
||||
struct has_two_args : std::false_type {};
|
||||
template <typename T>
|
||||
struct has_two_args<T,
|
||||
std::void_t<decltype(std::declval<T>().operator()(1, 2))>>
|
||||
: std::true_type {};
|
||||
|
||||
static void dispatch(LambdaTy func, llvm::raw_ostream &Os, uint32_t Level) {
|
||||
if constexpr (has_two_args<LambdaTy>::value)
|
||||
func(Os, Level);
|
||||
else
|
||||
func(Os);
|
||||
}
|
||||
};
|
||||
|
||||
#define ODBG_OS_BASE(Stream, Component, Prefix, Type, Level, Callback) \
|
||||
if (::llvm::offload::debug::isDebugEnabled()) { \
|
||||
uint32_t RealLevel = (Level); \
|
||||
if (::llvm::offload::debug::shouldPrintDebug((Component), (Type), \
|
||||
RealLevel)) { \
|
||||
::llvm::offload::debug::odbg_ostream OS{ \
|
||||
::llvm::offload::debug::computePrefix((Prefix), (Type)), (Stream), \
|
||||
RealLevel, /*ShouldPrefixNextString=*/true, \
|
||||
/*ShouldEmitNewLineOnDestruction=*/true}; \
|
||||
auto F = Callback; \
|
||||
::llvm::offload::debug::LambdaHelper<decltype(F)>::dispatch(F, OS, \
|
||||
RealLevel); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define ODBG_OS_STREAM(Stream, Type, Level, Callback) \
|
||||
ODBG_OS_BASE(Stream, GETNAME(TARGET_NAME), DEBUG_PREFIX, Type, Level, \
|
||||
Callback)
|
||||
#define ODBG_OS_3(Type, Level, Callback) \
|
||||
ODBG_OS_STREAM(llvm::offload::debug::dbgs(), Type, Level, Callback)
|
||||
#define ODBG_OS_2(Type, Callback) ODBG_OS_3(Type, 1, Callback)
|
||||
#define ODBG_OS_1(Callback) ODBG_OS_2("default", Callback)
|
||||
#define ODBG_OS_SELECT(Type, Level, Callback, NArgs, ...) ODBG_OS_##NArgs
|
||||
// Print a debug message of a certain type and verbosity level using a callback
|
||||
// to emit the message. If no type or level is provided, "default" and "1 are
|
||||
// assumed respectively.
|
||||
#define ODBG_OS(...) \
|
||||
ODBG_OS_SELECT(__VA_ARGS__ __VA_OPT__(, ) 3, 2, 1)(__VA_ARGS__)
|
||||
|
||||
#else
|
||||
|
||||
inline bool isDebugEnabled() { return false; }
|
||||
@ -446,6 +492,10 @@ inline bool isDebugEnabled() { return false; }
|
||||
#define ODBG_RESET_LEVEL() 0
|
||||
#define ODBG(...) ODBG_NULL
|
||||
|
||||
#define ODBG_OS_BASE(Stream, Component, Prefix, Type, Level, Callback)
|
||||
#define ODBG_OS_STREAM(Stream, Type, Level, Callback)
|
||||
#define ODBG_OS(...)
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace llvm::offload::debug
|
||||
@ -476,6 +526,9 @@ constexpr const char *ODT_DumpTable = "DumpTable";
|
||||
constexpr const char *ODT_MappingChanged = "MappingChanged";
|
||||
constexpr const char *ODT_PluginKernel = "PluginKernel";
|
||||
constexpr const char *ODT_EmptyMapping = "EmptyMapping";
|
||||
constexpr const char *ODT_Device = "Device";
|
||||
constexpr const char *ODT_Interface = "Interface";
|
||||
constexpr const char *ODT_Alloc = "Alloc";
|
||||
|
||||
static inline odbg_ostream reportErrorStream() {
|
||||
#ifdef OMPTARGET_DEBUG
|
||||
|
||||
@ -25,6 +25,7 @@
|
||||
#include "Utils/ExponentialBackoff.h"
|
||||
|
||||
#include "llvm/Frontend/OpenMP/OMPConstants.h"
|
||||
#include "llvm/Support/Format.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
@ -35,6 +36,7 @@
|
||||
#ifdef OMPT_SUPPORT
|
||||
using namespace llvm::omp::target::ompt;
|
||||
#endif
|
||||
using namespace llvm::omp::target::debug;
|
||||
|
||||
// If offload is enabled, ensure that device DeviceID has been initialized.
|
||||
//
|
||||
@ -49,25 +51,25 @@ using namespace llvm::omp::target::ompt;
|
||||
// This step might be skipped if offload is disabled.
|
||||
bool checkDevice(int64_t &DeviceID, ident_t *Loc) {
|
||||
if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) {
|
||||
DP("Offload is disabled\n");
|
||||
ODBG(ODT_Device) << "Offload is disabled";
|
||||
return true;
|
||||
}
|
||||
|
||||
if (DeviceID == OFFLOAD_DEVICE_DEFAULT) {
|
||||
DeviceID = omp_get_default_device();
|
||||
DP("Use default device id %" PRId64 "\n", DeviceID);
|
||||
ODBG(ODT_Device) << "Use default device id " << DeviceID;
|
||||
}
|
||||
|
||||
// Proposed behavior for OpenMP 5.2 in OpenMP spec github issue 2669.
|
||||
if (omp_get_num_devices() == 0) {
|
||||
DP("omp_get_num_devices() == 0 but offload is manadatory\n");
|
||||
ODBG(ODT_Device) << "omp_get_num_devices() == 0 but offload is manadatory";
|
||||
handleTargetOutcome(false, Loc);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (DeviceID == omp_get_initial_device()) {
|
||||
DP("Device is host (%" PRId64 "), returning as if offload is disabled\n",
|
||||
DeviceID);
|
||||
ODBG(ODT_Device) << "Device is host (" << DeviceID
|
||||
<< "), returning as if offload is disabled";
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -123,25 +125,25 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
|
||||
TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy",
|
||||
"NumArgs=" + std::to_string(ArgNum), Loc);
|
||||
|
||||
DP("Entering data %s region for device %" PRId64 " with %d mappings\n",
|
||||
RegionName, DeviceId, ArgNum);
|
||||
ODBG(ODT_Interface) << "Entering data " << RegionName << " region for device "
|
||||
<< DeviceId << " with " << ArgNum << " mappings";
|
||||
|
||||
if (checkDevice(DeviceId, Loc)) {
|
||||
DP("Not offloading to device %" PRId64 "\n", DeviceId);
|
||||
ODBG(ODT_Interface) << "Not offloading to device " << DeviceId;
|
||||
return;
|
||||
}
|
||||
|
||||
if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
|
||||
printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
|
||||
RegionTypeMsg);
|
||||
#ifdef OMPTARGET_DEBUG
|
||||
for (int I = 0; I < ArgNum; ++I) {
|
||||
DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
|
||||
", Type=0x%" PRIx64 ", Name=%s\n",
|
||||
I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
|
||||
(ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
|
||||
}
|
||||
#endif
|
||||
ODBG_OS(ODT_Kernel, [&](llvm::raw_ostream &Os) {
|
||||
for (int I = 0; I < ArgNum; ++I) {
|
||||
Os << "Entry " << llvm::format_decimal(I, 2) << ": Base=" << ArgsBase[I]
|
||||
<< ", Begin=" << Args[I] << ", Size=" << ArgSizes[I]
|
||||
<< ", Type=" << llvm::format_hex(ArgTypes[I], 8) << ", Name="
|
||||
<< ((ArgNames) ? getNameFromMapping(ArgNames[I]) : "unknown") << "\n";
|
||||
}
|
||||
});
|
||||
|
||||
auto DeviceOrErr = PM->getDevice(DeviceId);
|
||||
if (!DeviceOrErr)
|
||||
@ -274,7 +276,7 @@ static KernelArgsTy *upgradeKernelArgs(KernelArgsTy *KernelArgs,
|
||||
KernelArgsTy &LocalKernelArgs,
|
||||
int32_t NumTeams, int32_t ThreadLimit) {
|
||||
if (KernelArgs->Version > OMP_KERNEL_ARG_VERSION)
|
||||
DP("Unexpected ABI version: %u\n", KernelArgs->Version);
|
||||
ODBG(ODT_Interface) << "Unexpected ABI version: " << KernelArgs->Version;
|
||||
|
||||
uint32_t UpgradedVersion = KernelArgs->Version;
|
||||
if (KernelArgs->Version < OMP_KERNEL_ARG_VERSION) {
|
||||
@ -326,12 +328,11 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
|
||||
assert(PM && "Runtime not initialized");
|
||||
static_assert(std::is_convertible_v<TargetAsyncInfoTy &, AsyncInfoTy &>,
|
||||
"Target AsyncInfoTy must be convertible to AsyncInfoTy.");
|
||||
DP("Entering target region for device %" PRId64 " with entry point " DPxMOD
|
||||
"\n",
|
||||
DeviceId, DPxPTR(HostPtr));
|
||||
ODBG(ODT_Interface) << "Entering target region for device " << DeviceId
|
||||
<< " with entry point " << HostPtr;
|
||||
|
||||
if (checkDevice(DeviceId, Loc)) {
|
||||
DP("Not offloading to device %" PRId64 "\n", DeviceId);
|
||||
ODBG(ODT_Interface) << "Not offloading to device " << DeviceId;
|
||||
return OMP_TGT_FAIL;
|
||||
}
|
||||
|
||||
@ -354,17 +355,21 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
|
||||
printKernelArguments(Loc, DeviceId, KernelArgs->NumArgs,
|
||||
KernelArgs->ArgSizes, KernelArgs->ArgTypes,
|
||||
KernelArgs->ArgNames, "Entering OpenMP kernel");
|
||||
#ifdef OMPTARGET_DEBUG
|
||||
for (uint32_t I = 0; I < KernelArgs->NumArgs; ++I) {
|
||||
DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
|
||||
", Type=0x%" PRIx64 ", Name=%s\n",
|
||||
I, DPxPTR(KernelArgs->ArgBasePtrs[I]), DPxPTR(KernelArgs->ArgPtrs[I]),
|
||||
KernelArgs->ArgSizes[I], KernelArgs->ArgTypes[I],
|
||||
(KernelArgs->ArgNames)
|
||||
? getNameFromMapping(KernelArgs->ArgNames[I]).c_str()
|
||||
: "unknown");
|
||||
}
|
||||
#endif
|
||||
|
||||
ODBG_OS(ODT_Kernel, [&](llvm::raw_ostream &Os) {
|
||||
for (uint32_t I = 0; I < KernelArgs->NumArgs; ++I) {
|
||||
Os << "Entry " << llvm::format_decimal(I, 2)
|
||||
<< " Base=" << KernelArgs->ArgBasePtrs[I]
|
||||
<< ", Begin=" << KernelArgs->ArgPtrs[I]
|
||||
<< ", Size=" << KernelArgs->ArgSizes[I]
|
||||
<< ", Type=" << llvm::format_hex(KernelArgs->ArgTypes[I], 8)
|
||||
<< ", Name="
|
||||
<< (KernelArgs->ArgNames
|
||||
? getNameFromMapping(KernelArgs->ArgNames[I]).c_str()
|
||||
: "unknown")
|
||||
<< "\n";
|
||||
}
|
||||
});
|
||||
|
||||
auto DeviceOrErr = PM->getDevice(DeviceId);
|
||||
if (!DeviceOrErr)
|
||||
@ -463,7 +468,7 @@ EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId,
|
||||
assert(PM && "Runtime not initialized");
|
||||
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
|
||||
if (checkDevice(DeviceId, Loc)) {
|
||||
DP("Not offloading to device %" PRId64 "\n", DeviceId);
|
||||
ODBG(ODT_Interface) << "Not offloading to device " << DeviceId;
|
||||
return OMP_TGT_FAIL;
|
||||
}
|
||||
auto DeviceOrErr = PM->getDevice(DeviceId);
|
||||
@ -491,8 +496,8 @@ EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId,
|
||||
EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) {
|
||||
auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
|
||||
int64_t Size = MapperComponentsPtr->Components.size();
|
||||
DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n",
|
||||
DPxPTR(RtMapperHandle), Size);
|
||||
ODBG(ODT_Interface) << "__tgt_mapper_num_components(Handle=" << RtMapperHandle
|
||||
<< ") returns " << Size;
|
||||
return Size;
|
||||
}
|
||||
|
||||
@ -500,11 +505,11 @@ EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) {
|
||||
EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base,
|
||||
void *Begin, int64_t Size, int64_t Type,
|
||||
void *Name) {
|
||||
DP("__tgt_push_mapper_component(Handle=" DPxMOD
|
||||
") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
|
||||
", Type=0x%" PRIx64 ", Name=%s).\n",
|
||||
DPxPTR(RtMapperHandle), DPxPTR(Base), DPxPTR(Begin), Size, Type,
|
||||
(Name) ? getNameFromMapping(Name).c_str() : "unknown");
|
||||
ODBG(ODT_Interface) << "__tgt_push_mapper_component(Handle=" << RtMapperHandle
|
||||
<< ") adds an entry (Base=" << Base << ", Begin=" << Begin
|
||||
<< ", Size=" << Size
|
||||
<< ", Type=" << llvm::format_hex(Type, 8) << ", Name="
|
||||
<< ((Name) ? getNameFromMapping(Name) : "unknown") << ")";
|
||||
auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
|
||||
MapperComponentsPtr->Components.push_back(
|
||||
MapComponentInfoTy(Base, Begin, Size, Type, Name));
|
||||
|
||||
@ -41,6 +41,7 @@ using llvm::SmallVector;
|
||||
#ifdef OMPT_SUPPORT
|
||||
using namespace llvm::omp::target::ompt;
|
||||
#endif
|
||||
using namespace llvm::omp::target::debug;
|
||||
|
||||
int AsyncInfoTy::synchronize() {
|
||||
int Result = OFFLOAD_SUCCESS;
|
||||
@ -200,10 +201,11 @@ static int32_t getParentIndex(int64_t Type) {
|
||||
|
||||
void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
|
||||
const char *Name) {
|
||||
DP("Call to %s for device %d requesting %zu bytes\n", Name, DeviceNum, Size);
|
||||
ODBG(ODT_Interface) << "Call to " << Name << " for device " << DeviceNum
|
||||
<< " requesting " << Size << " bytes";
|
||||
|
||||
if (Size <= 0) {
|
||||
DP("Call to %s with non-positive length\n", Name);
|
||||
ODBG(ODT_Interface) << "Call to " << Name << " with non-positive length";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -211,7 +213,7 @@ void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
|
||||
|
||||
if (DeviceNum == omp_get_initial_device()) {
|
||||
Rc = malloc(Size);
|
||||
DP("%s returns host ptr " DPxMOD "\n", Name, DPxPTR(Rc));
|
||||
ODBG(ODT_Interface) << Name << " returns host ptr " << Rc;
|
||||
return Rc;
|
||||
}
|
||||
|
||||
@ -220,23 +222,23 @@ void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
|
||||
FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
|
||||
|
||||
Rc = DeviceOrErr->allocData(Size, nullptr, Kind);
|
||||
DP("%s returns device ptr " DPxMOD "\n", Name, DPxPTR(Rc));
|
||||
ODBG(ODT_Interface) << Name << " returns device ptr " << Rc;
|
||||
return Rc;
|
||||
}
|
||||
|
||||
void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
|
||||
const char *Name) {
|
||||
DP("Call to %s for device %d and address " DPxMOD "\n", Name, DeviceNum,
|
||||
DPxPTR(DevicePtr));
|
||||
ODBG(ODT_Interface) << "Call to " << Name << " for device " << DeviceNum
|
||||
<< " and address " << DevicePtr;
|
||||
|
||||
if (!DevicePtr) {
|
||||
DP("Call to %s with NULL ptr\n", Name);
|
||||
ODBG(ODT_Interface) << "Call to " << Name << " with NULL ptr";
|
||||
return;
|
||||
}
|
||||
|
||||
if (DeviceNum == omp_get_initial_device()) {
|
||||
free(DevicePtr);
|
||||
DP("%s deallocated host ptr\n", Name);
|
||||
ODBG(ODT_Interface) << Name << " deallocated host ptr";
|
||||
return;
|
||||
}
|
||||
|
||||
@ -249,15 +251,16 @@ void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
|
||||
"Failed to deallocate device ptr. Set "
|
||||
"OFFLOAD_TRACK_ALLOCATION_TRACES=1 to track allocations.");
|
||||
|
||||
DP("omp_target_free deallocated device ptr\n");
|
||||
ODBG(ODT_Interface) << "omp_target_free deallocated device ptr";
|
||||
}
|
||||
|
||||
void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum,
|
||||
const char *Name) {
|
||||
DP("Call to %s for device %d locking %zu bytes\n", Name, DeviceNum, Size);
|
||||
ODBG(ODT_Interface) << "Call to " << Name << " for device " << DeviceNum
|
||||
<< " locking " << Size << " bytes";
|
||||
|
||||
if (Size <= 0) {
|
||||
DP("Call to %s with non-positive length\n", Name);
|
||||
ODBG(ODT_Interface) << "Call to " << Name << " with non-positive length";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -270,22 +273,23 @@ void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum,
|
||||
int32_t Err = 0;
|
||||
Err = DeviceOrErr->RTL->data_lock(DeviceNum, HostPtr, Size, &RC);
|
||||
if (Err) {
|
||||
DP("Could not lock ptr %p\n", HostPtr);
|
||||
ODBG(ODT_Interface) << "Could not lock ptr " << HostPtr;
|
||||
return nullptr;
|
||||
}
|
||||
DP("%s returns device ptr " DPxMOD "\n", Name, DPxPTR(RC));
|
||||
ODBG(ODT_Interface) << Name << " returns device ptr " << RC;
|
||||
return RC;
|
||||
}
|
||||
|
||||
void targetUnlockExplicit(void *HostPtr, int DeviceNum, const char *Name) {
|
||||
DP("Call to %s for device %d unlocking\n", Name, DeviceNum);
|
||||
ODBG(ODT_Interface) << "Call to " << Name << " for device " << DeviceNum
|
||||
<< " unlocking";
|
||||
|
||||
auto DeviceOrErr = PM->getDevice(DeviceNum);
|
||||
if (!DeviceOrErr)
|
||||
FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
|
||||
|
||||
DeviceOrErr->RTL->data_unlock(DeviceNum, HostPtr);
|
||||
DP("%s returns\n", Name);
|
||||
ODBG(ODT_Interface) << Name << " returns";
|
||||
}
|
||||
|
||||
/// Call the user-defined mapper function followed by the appropriate
|
||||
@ -295,7 +299,7 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
|
||||
void *ArgMapper, AsyncInfoTy &AsyncInfo,
|
||||
TargetDataFuncPtrTy TargetDataFunction,
|
||||
AttachInfoTy *AttachInfo = nullptr) {
|
||||
DP("Calling the mapper function " DPxMOD "\n", DPxPTR(ArgMapper));
|
||||
ODBG(ODT_Interface) << "Calling the mapper function " << ArgMapper;
|
||||
|
||||
// The mapper function fills up Components.
|
||||
MapperComponentsTy MapperComponents;
|
||||
@ -368,12 +372,11 @@ static void *calculateTargetPointeeBase(void *HstPteeBase, void *HstPteeBegin,
|
||||
void *TgtPteeBase = reinterpret_cast<void *>(
|
||||
reinterpret_cast<uint64_t>(TgtPteeBegin) - Delta);
|
||||
|
||||
DP("HstPteeBase: " DPxMOD ", HstPteeBegin: " DPxMOD
|
||||
", Delta (HstPteeBegin - HstPteeBase): %" PRIu64 ".\n",
|
||||
DPxPTR(HstPteeBase), DPxPTR(HstPteeBegin), Delta);
|
||||
DP("TgtPteeBase (TgtPteeBegin - Delta): " DPxMOD ", TgtPteeBegin : " DPxMOD
|
||||
"\n",
|
||||
DPxPTR(TgtPteeBase), DPxPTR(TgtPteeBegin));
|
||||
ODBG(ODT_Mapping) << "HstPteeBase: " << HstPteeBase
|
||||
<< ", HstPteeBegin: " << HstPteeBegin
|
||||
<< ", Delta (HstPteeBegin - HstPteeBase): " << Delta << "\n"
|
||||
<< "TgtPteeBase (TgtPteeBegin - Delta): " << TgtPteeBase
|
||||
<< ", TgtPteeBegin: " << TgtPteeBegin;
|
||||
|
||||
return TgtPteeBase;
|
||||
}
|
||||
@ -453,18 +456,18 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
|
||||
// Add shadow pointer tracking
|
||||
if (!PtrTPR.getEntry()->addShadowPointer(
|
||||
ShadowPtrInfoTy{HstPtrAddr, TgtPtrAddr, TgtPteeBase, HstPtrSize})) {
|
||||
DP("Pointer " DPxMOD " is already attached to " DPxMOD "\n",
|
||||
DPxPTR(TgtPtrAddr), DPxPTR(TgtPteeBase));
|
||||
ODBG(ODT_Mapping) << "Pointer " << TgtPtrAddr << " is already attached to "
|
||||
<< TgtPteeBase;
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(TgtPtrAddr),
|
||||
DPxPTR(TgtPteeBase));
|
||||
ODBG(ODT_Mapping) << "Update pointer (" << TgtPtrAddr << ") -> ["
|
||||
<< TgtPteeBase << "]\n";
|
||||
|
||||
// Lambda to handle submitData result and perform final steps.
|
||||
auto HandleSubmitResult = [&](int SubmitResult) -> int {
|
||||
if (SubmitResult != OFFLOAD_SUCCESS) {
|
||||
REPORT("Failed to update pointer on device.\n");
|
||||
REPORT() << "Failed to update pointer on device.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
@ -491,11 +494,11 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
|
||||
std::memcpy(SrcBuffer + VoidPtrSize, HstDescriptorFieldsAddr,
|
||||
HstDescriptorFieldsSize);
|
||||
|
||||
DP("Updating %" PRId64 " bytes of descriptor (" DPxMOD
|
||||
") (pointer + %" PRId64 " additional bytes from host descriptor " DPxMOD
|
||||
")\n",
|
||||
HstPtrSize, DPxPTR(TgtPtrAddr), HstDescriptorFieldsSize,
|
||||
DPxPTR(HstDescriptorFieldsAddr));
|
||||
ODBG(ODT_Mapping) << "Updating " << HstPtrSize << " bytes of descriptor ("
|
||||
<< TgtPtrAddr << ") (pointer + "
|
||||
<< HstDescriptorFieldsSize
|
||||
<< " additional bytes from host descriptor "
|
||||
<< HstDescriptorFieldsAddr << ")";
|
||||
}
|
||||
|
||||
// Submit the populated source buffer to device.
|
||||
@ -524,7 +527,8 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
// Instead of executing the regular path of targetDataBegin, call the
|
||||
// targetDataMapper variant which will call targetDataBegin again
|
||||
// with new arguments.
|
||||
DP("Calling targetDataMapper for the %dth argument\n", I);
|
||||
ODBG(ODT_Mapping) << "Calling targetDataMapper for the " << I
|
||||
<< "th argument";
|
||||
|
||||
map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
|
||||
int Rc = targetDataMapper(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
|
||||
@ -532,8 +536,8 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
targetDataBegin, AttachInfo);
|
||||
|
||||
if (Rc != OFFLOAD_SUCCESS) {
|
||||
REPORT("Call to targetDataBegin via targetDataMapper for custom mapper"
|
||||
" failed.\n");
|
||||
REPORT() << "Call to targetDataBegin via targetDataMapper for custom "
|
||||
"mapper failed";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
@ -561,7 +565,8 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
/*PointerSize=*/DataSize, /*MapType=*/ArgTypes[I],
|
||||
/*PointeeName=*/HstPtrName);
|
||||
|
||||
DP("Deferring ATTACH map-type processing for argument %d\n", I);
|
||||
ODBG(ODT_Mapping) << "Deferring ATTACH map-type processing for argument "
|
||||
<< I;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -575,9 +580,8 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
int64_t Alignment = getPartialStructRequiredAlignment(HstPtrBase);
|
||||
TgtPadding = (int64_t)HstPtrBegin % Alignment;
|
||||
if (TgtPadding) {
|
||||
DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD
|
||||
"\n",
|
||||
TgtPadding, DPxPTR(HstPtrBegin));
|
||||
ODBG(ODT_Mapping) << "Using a padding of " << TgtPadding
|
||||
<< " bytes for begin address " << HstPtrBegin;
|
||||
}
|
||||
}
|
||||
|
||||
@ -602,7 +606,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
MappingInfoTy::HDTTMapAccessorTy HDTTMap =
|
||||
Device.getMappingInfo().HostDataToTargetMap.getExclusiveAccessor();
|
||||
if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
|
||||
DP("Has a pointer entry: \n");
|
||||
ODBG(ODT_Mapping) << "Has a pointer entry";
|
||||
// Base is address of pointer.
|
||||
//
|
||||
// Usually, the pointer is already allocated by this time. For example:
|
||||
@ -625,9 +629,10 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
PointerTgtPtrBegin = PointerTpr.TargetPointer;
|
||||
IsHostPtr = PointerTpr.Flags.IsHostPointer;
|
||||
if (!PointerTgtPtrBegin) {
|
||||
REPORT("Call to getTargetPointer returned null pointer (%s).\n",
|
||||
HasPresentModifier ? "'present' map type modifier"
|
||||
: "device failure or illegal mapping");
|
||||
REPORT() << "Call to getTargetPointer returned null pointer ("
|
||||
<< (HasPresentModifier ? "'present' map type modifier"
|
||||
: "device failure or illegal mapping")
|
||||
<< ")";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
@ -635,10 +640,11 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
if (PointerTpr.Flags.IsNewEntry && !IsHostPtr)
|
||||
AttachInfo->NewAllocations[HstPtrBase] = sizeof(void *);
|
||||
|
||||
DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new"
|
||||
"\n",
|
||||
sizeof(void *), DPxPTR(PointerTgtPtrBegin),
|
||||
(PointerTpr.Flags.IsNewEntry ? "" : " not"));
|
||||
ODBG(ODT_Mapping) << "There are " << sizeof(void *)
|
||||
<< " bytes allocated at target address "
|
||||
<< PointerTgtPtrBegin << " - is"
|
||||
<< (PointerTpr.Flags.IsNewEntry ? "" : " not")
|
||||
<< " new";
|
||||
PointerHstPtrBegin = HstPtrBase;
|
||||
// modify current entry.
|
||||
HstPtrBase = *reinterpret_cast<void **>(HstPtrBase);
|
||||
@ -660,9 +666,10 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
// If data_size==0, then the argument could be a zero-length pointer to
|
||||
// NULL, so getOrAlloc() returning NULL is not an error.
|
||||
if (!TgtPtrBegin && (DataSize || HasPresentModifier)) {
|
||||
REPORT("Call to getTargetPointer returned null pointer (%s).\n",
|
||||
HasPresentModifier ? "'present' map type modifier"
|
||||
: "device failure or illegal mapping");
|
||||
REPORT() << "Call to getTargetPointer returned null pointer ("
|
||||
<< (HasPresentModifier ? "'present' map type modifier"
|
||||
: "device failure or illegal mapping")
|
||||
<< ").";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
@ -670,14 +677,15 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
if (TPR.Flags.IsNewEntry && !IsHostPtr && TgtPtrBegin)
|
||||
AttachInfo->NewAllocations[HstPtrBegin] = DataSize;
|
||||
|
||||
DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
|
||||
" - is%s new\n",
|
||||
DataSize, DPxPTR(TgtPtrBegin), (TPR.Flags.IsNewEntry ? "" : " not"));
|
||||
ODBG(ODT_Mapping) << "There are " << DataSize
|
||||
<< " bytes allocated at target address " << TgtPtrBegin
|
||||
<< " - is" << (TPR.Flags.IsNewEntry ? "" : " not")
|
||||
<< " new";
|
||||
|
||||
if (ArgTypes[I] & OMP_TGT_MAPTYPE_RETURN_PARAM) {
|
||||
uintptr_t Delta = (uintptr_t)HstPtrBegin - (uintptr_t)HstPtrBase;
|
||||
void *TgtPtrBase = (void *)((uintptr_t)TgtPtrBegin - Delta);
|
||||
DP("Returning device pointer " DPxMOD "\n", DPxPTR(TgtPtrBase));
|
||||
ODBG(ODT_Mapping) << "Returning device pointer " << TgtPtrBase;
|
||||
ArgsBase[I] = TgtPtrBase;
|
||||
}
|
||||
|
||||
@ -755,19 +763,20 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
|
||||
AsyncInfoTy &AsyncInfo) {
|
||||
// Report all tracked allocations from both main loop and ATTACH processing
|
||||
if (!AttachInfo.NewAllocations.empty()) {
|
||||
DP("Tracked %u total new allocations:\n",
|
||||
(unsigned)AttachInfo.NewAllocations.size());
|
||||
for ([[maybe_unused]] const auto &Alloc : AttachInfo.NewAllocations) {
|
||||
DP(" Host ptr: " DPxMOD ", Size: %" PRId64 " bytes\n",
|
||||
DPxPTR(Alloc.first), Alloc.second);
|
||||
}
|
||||
ODBG_OS(ODT_Mapping, [&](llvm::raw_ostream &OS) {
|
||||
OS << "Tracked " << AttachInfo.NewAllocations.size()
|
||||
<< " total new allocations:";
|
||||
for (const auto &Alloc : AttachInfo.NewAllocations) {
|
||||
OS << " Host ptr: " << Alloc.first << ", Size: " << Alloc.second
|
||||
<< " bytes";
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (AttachInfo.AttachEntries.empty())
|
||||
return OFFLOAD_SUCCESS;
|
||||
|
||||
DP("Processing %zu deferred ATTACH map entries\n",
|
||||
AttachInfo.AttachEntries.size());
|
||||
ODBG(ODT_Mapping) << "Processing " << AttachInfo.AttachEntries.size();
|
||||
|
||||
int Ret = OFFLOAD_SUCCESS;
|
||||
bool IsFirstPointerAttachment = true;
|
||||
@ -783,9 +792,11 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
|
||||
int64_t PtrSize = AttachEntry.PointerSize;
|
||||
int64_t MapType = AttachEntry.MapType;
|
||||
|
||||
DP("Processing ATTACH entry %zu: HstPtr=" DPxMOD ", HstPteeBegin=" DPxMOD
|
||||
", Size=%" PRId64 ", Type=0x%" PRIx64 "\n",
|
||||
EntryIdx, DPxPTR(HstPtr), DPxPTR(HstPteeBegin), PtrSize, MapType);
|
||||
ODBG(ODT_Mapping) << "Processing ATTACH entry " << EntryIdx
|
||||
<< ": HstPtr=" << HstPtr
|
||||
<< ", HstPteeBegin=" << HstPteeBegin
|
||||
<< ", PtrSize=" << PtrSize << ", MapType=0x"
|
||||
<< llvm::utohexstr(MapType);
|
||||
|
||||
const bool IsAttachAlways = MapType & OMP_TGT_MAPTYPE_ALWAYS;
|
||||
|
||||
@ -799,8 +810,9 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
|
||||
Ptr < reinterpret_cast<void *>(
|
||||
reinterpret_cast<char *>(AllocPtr) + AllocSize);
|
||||
});
|
||||
DP("Attach %s " DPxMOD " was newly allocated: %s\n", PtrName, DPxPTR(Ptr),
|
||||
IsNewlyAllocated ? "yes" : "no");
|
||||
ODBG(ODT_Mapping) << "Attach " << PtrName << " " << Ptr
|
||||
<< " was newly allocated: "
|
||||
<< (IsNewlyAllocated ? "yes" : "no");
|
||||
return IsNewlyAllocated;
|
||||
};
|
||||
|
||||
@ -808,9 +820,9 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
|
||||
// allocated, or the ALWAYS flag is set.
|
||||
if (!IsAttachAlways && !WasNewlyAllocated(HstPteeBegin, "pointee") &&
|
||||
!WasNewlyAllocated(HstPtr, "pointer")) {
|
||||
DP("Skipping ATTACH entry %zu: neither pointer nor pointee was newly "
|
||||
"allocated and no ALWAYS flag\n",
|
||||
EntryIdx);
|
||||
ODBG(ODT_Mapping) << "Skipping ATTACH entry " << EntryIdx
|
||||
<< ": neither pointer nor pointee was newly "
|
||||
"allocated and no ALWAYS flag";
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -824,19 +836,19 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
|
||||
Ptr, Size, /*UpdateRefCount=*/false,
|
||||
/*UseHoldRefCount=*/false, /*MustContain=*/true);
|
||||
|
||||
DP("Attach %s lookup - IsPresent=%s, IsHostPtr=%s\n", PtrType,
|
||||
TPR.isPresent() ? "yes" : "no",
|
||||
TPR.Flags.IsHostPointer ? "yes" : "no");
|
||||
ODBG(ODT_Mapping) << "Attach " << PtrType << " lookup - IsPresent="
|
||||
<< (TPR.isPresent() ? "yes" : "no") << ", IsHostPtr="
|
||||
<< (TPR.Flags.IsHostPointer ? "yes" : "no");
|
||||
|
||||
if (!TPR.isPresent()) {
|
||||
DP("Skipping ATTACH entry %zu: %s not present on device\n", EntryIdx,
|
||||
PtrType);
|
||||
ODBG(ODT_Mapping) << "Skipping ATTACH entry " << EntryIdx << ": "
|
||||
<< PtrType << " not present on device";
|
||||
return std::nullopt;
|
||||
}
|
||||
if (TPR.Flags.IsHostPointer) {
|
||||
DP("Skipping ATTACH entry %zu: device version of the %s is a host "
|
||||
"pointer.\n",
|
||||
EntryIdx, PtrType);
|
||||
ODBG(ODT_Mapping) << "Skipping ATTACH entry " << EntryIdx
|
||||
<< ": device version of the " << PtrType
|
||||
<< " is a host pointer.";
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
@ -865,10 +877,11 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
|
||||
// Insert a data-fence before the first pointer-attachment.
|
||||
if (IsFirstPointerAttachment) {
|
||||
IsFirstPointerAttachment = false;
|
||||
DP("Inserting a data fence before the first pointer attachment.\n");
|
||||
ODBG(ODT_Mapping)
|
||||
<< "Inserting a data fence before the first pointer attachment.";
|
||||
Ret = Device.dataFence(AsyncInfo);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Failed to insert data fence.\n");
|
||||
REPORT() << "Failed to insert data fence.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
}
|
||||
@ -881,7 +894,8 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
|
||||
if (Ret != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
DP("ATTACH entry %zu processed successfully\n", EntryIdx);
|
||||
ODBG(ODT_Mapping) << "ATTACH entry " << EntryIdx
|
||||
<< " processed successfully";
|
||||
}
|
||||
|
||||
return OFFLOAD_SUCCESS;
|
||||
@ -966,16 +980,16 @@ postProcessingTargetDataEnd(DeviceTy *Device,
|
||||
Entry->foreachShadowPointerInfo([&](const ShadowPtrInfoTy &ShadowPtr) {
|
||||
constexpr int64_t VoidPtrSize = sizeof(void *);
|
||||
if (ShadowPtr.PtrSize > VoidPtrSize) {
|
||||
DP("Restoring host descriptor " DPxMOD
|
||||
" to its original content (%" PRId64
|
||||
" bytes), containing pointee address " DPxMOD "\n",
|
||||
DPxPTR(ShadowPtr.HstPtrAddr), ShadowPtr.PtrSize,
|
||||
DPxPTR(ShadowPtr.HstPtrContent.data()));
|
||||
ODBG(ODT_Mapping)
|
||||
<< "Restoring host descriptor " << (void *)ShadowPtr.HstPtrAddr
|
||||
<< " to its original content (" << ShadowPtr.PtrSize
|
||||
<< " bytes), containing pointee address "
|
||||
<< (void *)ShadowPtr.HstPtrContent.data();
|
||||
} else {
|
||||
DP("Restoring host pointer " DPxMOD " to its original value " DPxMOD
|
||||
"\n",
|
||||
DPxPTR(ShadowPtr.HstPtrAddr),
|
||||
DPxPTR(ShadowPtr.HstPtrContent.data()));
|
||||
ODBG(ODT_Mapping)
|
||||
<< "Restoring host pointer " << (void *)ShadowPtr.HstPtrAddr
|
||||
<< " to its original value "
|
||||
<< (void *)ShadowPtr.HstPtrContent.data();
|
||||
}
|
||||
std::memcpy(ShadowPtr.HstPtrAddr, ShadowPtr.HstPtrContent.data(),
|
||||
ShadowPtr.PtrSize);
|
||||
@ -995,7 +1009,7 @@ postProcessingTargetDataEnd(DeviceTy *Device,
|
||||
HDTTMap.destroy();
|
||||
Ret |= Device->getMappingInfo().deallocTgtPtrAndEntry(Entry, DataSize);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Deallocating data from device failed.\n");
|
||||
REPORT() << "Deallocating data from device failed.";
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1024,7 +1038,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
// directives. They may be encountered here while handling the "end" part of
|
||||
// "#pragma omp target".
|
||||
if (ArgTypes[I] & OMP_TGT_MAPTYPE_ATTACH) {
|
||||
DP("Ignoring ATTACH entry %d in targetDataEnd\n", I);
|
||||
ODBG(ODT_Mapping) << "Ignoring ATTACH entry " << I << " in targetDataEnd";
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1032,7 +1046,8 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
// Instead of executing the regular path of targetDataEnd, call the
|
||||
// targetDataMapper variant which will call targetDataEnd again
|
||||
// with new arguments.
|
||||
DP("Calling targetDataMapper for the %dth argument\n", I);
|
||||
ODBG(ODT_Mapping) << "Calling targetDataMapper for the " << I
|
||||
<< "th argument";
|
||||
|
||||
map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
|
||||
Ret = targetDataMapper(Loc, Device, ArgBases[I], Args[I], ArgSizes[I],
|
||||
@ -1040,8 +1055,8 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
targetDataEnd);
|
||||
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Call to targetDataEnd via targetDataMapper for custom mapper"
|
||||
" failed.\n");
|
||||
REPORT() << "Call to targetDataEnd via targetDataMapper for custom "
|
||||
"mapper failed.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
@ -1066,8 +1081,10 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
void *TgtPtrBegin = TPR.TargetPointer;
|
||||
if (!TPR.isPresent() && !TPR.isHostPointer() &&
|
||||
(DataSize || HasPresentModifier)) {
|
||||
DP("Mapping does not exist (%s)\n",
|
||||
(HasPresentModifier ? "'present' map type modifier" : "ignored"));
|
||||
ODBG(ODT_Mapping) << "Mapping does not exist ("
|
||||
<< (HasPresentModifier ? "'present' map type modifier"
|
||||
: "ignored")
|
||||
<< ")";
|
||||
if (HasPresentModifier) {
|
||||
// OpenMP 5.1, sec. 2.21.7.1 "map Clause", p. 350 L10-13:
|
||||
// "If a map clause appears on a target, target data, target enter data
|
||||
@ -1090,9 +1107,10 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
} else {
|
||||
DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
|
||||
" - is%s last\n",
|
||||
DataSize, DPxPTR(TgtPtrBegin), (TPR.Flags.IsLast ? "" : " not"));
|
||||
ODBG(ODT_Mapping) << "There are " << DataSize
|
||||
<< " bytes allocated at target address " << TgtPtrBegin
|
||||
<< " - is" << (TPR.Flags.IsLast ? "" : " not")
|
||||
<< " last";
|
||||
}
|
||||
|
||||
// OpenMP 5.1, sec. 2.21.7.1 "map Clause", p. 351 L14-16:
|
||||
@ -1108,14 +1126,15 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
const bool HasFrom = ArgTypes[I] & OMP_TGT_MAPTYPE_FROM;
|
||||
if (HasFrom && (HasAlways || TPR.Flags.IsLast) &&
|
||||
!TPR.Flags.IsHostPointer && DataSize != 0) {
|
||||
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
|
||||
DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
|
||||
ODBG(ODT_Mapping) << "Moving " << DataSize
|
||||
<< " bytes (tgt:" << TgtPtrBegin
|
||||
<< ") -> (hst:" << HstPtrBegin << ")";
|
||||
TIMESCOPE_WITH_DETAILS_AND_IDENT(
|
||||
"DevToHost", "Size=" + std::to_string(DataSize) + "B", Loc);
|
||||
// Wait for any previous transfer if an event is present.
|
||||
if (void *Event = TPR.getEntry()->getEvent()) {
|
||||
if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) {
|
||||
REPORT("Failed to wait for event " DPxMOD ".\n", DPxPTR(Event));
|
||||
REPORT() << "Failed to wait for event " << Event << ".";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
}
|
||||
@ -1123,7 +1142,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize, AsyncInfo,
|
||||
TPR.getEntry());
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Copying data from device failed.\n");
|
||||
REPORT() << "Copying data from device failed.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
@ -1163,7 +1182,8 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
|
||||
/*UseHoldRefCount=*/false, /*MustContain=*/true);
|
||||
void *TgtPtrBegin = TPR.TargetPointer;
|
||||
if (!TPR.isPresent()) {
|
||||
DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin));
|
||||
ODBG(ODT_Mapping) << "hst data:" << HstPtrBegin
|
||||
<< " not found, becomes a noop";
|
||||
if (ArgType & OMP_TGT_MAPTYPE_PRESENT) {
|
||||
MESSAGE("device mapping required by 'present' motion modifier does not "
|
||||
"exist for host address " DPxMOD " (%" PRId64 " bytes)",
|
||||
@ -1174,18 +1194,18 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
|
||||
}
|
||||
|
||||
if (TPR.Flags.IsHostPointer) {
|
||||
DP("hst data:" DPxMOD " unified and shared, becomes a noop\n",
|
||||
DPxPTR(HstPtrBegin));
|
||||
ODBG(ODT_Mapping) << "hst data:" << HstPtrBegin
|
||||
<< " unified and shared, becomes a noop";
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
if (ArgType & OMP_TGT_MAPTYPE_TO) {
|
||||
DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
|
||||
ArgSize, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
|
||||
ODBG(ODT_Mapping) << "Moving " << ArgSize << " bytes (hst:" << HstPtrBegin
|
||||
<< ") -> (tgt:" << TgtPtrBegin << ")";
|
||||
int Ret = Device.submitData(TgtPtrBegin, HstPtrBegin, ArgSize, AsyncInfo,
|
||||
TPR.getEntry());
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Copying data to device failed.\n");
|
||||
REPORT() << "Copying data to device failed.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
if (TPR.getEntry()) {
|
||||
@ -1193,40 +1213,40 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
|
||||
[&](ShadowPtrInfoTy &ShadowPtr) {
|
||||
constexpr int64_t VoidPtrSize = sizeof(void *);
|
||||
if (ShadowPtr.PtrSize > VoidPtrSize) {
|
||||
DP("Restoring target descriptor " DPxMOD
|
||||
" to its original content (%" PRId64
|
||||
" bytes), containing pointee address " DPxMOD "\n",
|
||||
DPxPTR(ShadowPtr.TgtPtrAddr), ShadowPtr.PtrSize,
|
||||
DPxPTR(ShadowPtr.TgtPtrContent.data()));
|
||||
ODBG(ODT_Mapping)
|
||||
<< "Restoring target descriptor " << ShadowPtr.TgtPtrAddr
|
||||
<< " to its original content (" << ShadowPtr.PtrSize
|
||||
<< " bytes), containing pointee address "
|
||||
<< ShadowPtr.TgtPtrContent.data();
|
||||
} else {
|
||||
DP("Restoring target pointer " DPxMOD
|
||||
" to its original value " DPxMOD "\n",
|
||||
DPxPTR(ShadowPtr.TgtPtrAddr),
|
||||
DPxPTR(ShadowPtr.TgtPtrContent.data()));
|
||||
ODBG(ODT_Mapping)
|
||||
<< "Restoring target pointer " << ShadowPtr.TgtPtrAddr
|
||||
<< " to its original value "
|
||||
<< ShadowPtr.TgtPtrContent.data();
|
||||
}
|
||||
Ret = Device.submitData(ShadowPtr.TgtPtrAddr,
|
||||
ShadowPtr.TgtPtrContent.data(),
|
||||
ShadowPtr.PtrSize, AsyncInfo);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Copying data to device failed.\n");
|
||||
REPORT() << "Copying data to device failed.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
return OFFLOAD_SUCCESS;
|
||||
});
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
DP("Updating shadow map failed\n");
|
||||
ODBG(ODT_Mapping) << "Updating shadow map failed";
|
||||
return Ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ArgType & OMP_TGT_MAPTYPE_FROM) {
|
||||
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
|
||||
ArgSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
|
||||
ODBG(ODT_Mapping) << "Moving " << ArgSize << " bytes (tgt:" << TgtPtrBegin
|
||||
<< ") -> (hst:" << HstPtrBegin << ")";
|
||||
int Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, ArgSize, AsyncInfo,
|
||||
TPR.getEntry());
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Copying data from device failed.\n");
|
||||
REPORT() << "Copying data from device failed.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
@ -1238,16 +1258,16 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
|
||||
[&](const ShadowPtrInfoTy &ShadowPtr) {
|
||||
constexpr int64_t VoidPtrSize = sizeof(void *);
|
||||
if (ShadowPtr.PtrSize > VoidPtrSize) {
|
||||
DP("Restoring host descriptor " DPxMOD
|
||||
" to its original content (%" PRId64
|
||||
" bytes), containing pointee address " DPxMOD "\n",
|
||||
DPxPTR(ShadowPtr.HstPtrAddr), ShadowPtr.PtrSize,
|
||||
DPxPTR(ShadowPtr.HstPtrContent.data()));
|
||||
ODBG(ODT_Mapping)
|
||||
<< "Restoring host descriptor " << ShadowPtr.HstPtrAddr
|
||||
<< " to its original content (" << ShadowPtr.PtrSize
|
||||
<< " bytes), containing pointee address "
|
||||
<< ShadowPtr.HstPtrContent.data();
|
||||
} else {
|
||||
DP("Restoring host pointer " DPxMOD
|
||||
" to its original value " DPxMOD "\n",
|
||||
DPxPTR(ShadowPtr.HstPtrAddr),
|
||||
DPxPTR(ShadowPtr.HstPtrContent.data()));
|
||||
ODBG(ODT_Mapping)
|
||||
<< "Restoring host pointer " << ShadowPtr.HstPtrAddr
|
||||
<< " to its original value "
|
||||
<< ShadowPtr.HstPtrContent.data();
|
||||
}
|
||||
std::memcpy(ShadowPtr.HstPtrAddr, ShadowPtr.HstPtrContent.data(),
|
||||
ShadowPtr.PtrSize);
|
||||
@ -1255,7 +1275,7 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
|
||||
});
|
||||
Entry->unlock();
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
DP("Updating shadow map failed\n");
|
||||
ODBG(ODT_Mapping) << "Updating shadow map failed";
|
||||
return Ret;
|
||||
}
|
||||
return OFFLOAD_SUCCESS;
|
||||
@ -1291,9 +1311,8 @@ static int targetDataNonContiguous(ident_t *Loc, DeviceTy &Device,
|
||||
}
|
||||
} else {
|
||||
char *Ptr = (char *)ArgsBase + Offset;
|
||||
DP("Transfer of non-contiguous : host ptr " DPxMOD " offset %" PRIu64
|
||||
" len %" PRIu64 "\n",
|
||||
DPxPTR(Ptr), Offset, Size);
|
||||
ODBG(ODT_Mapping) << "Transfer of non-contiguous : host ptr " << Ptr
|
||||
<< " offset " << Offset << " len " << Size;
|
||||
Ret = targetDataContiguous(Loc, Device, ArgsBase, Ptr, Size, ArgType,
|
||||
AsyncInfo);
|
||||
}
|
||||
@ -1326,16 +1345,16 @@ int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
// Instead of executing the regular path of targetDataUpdate, call the
|
||||
// targetDataMapper variant which will call targetDataUpdate again
|
||||
// with new arguments.
|
||||
DP("Calling targetDataMapper for the %dth argument\n", I);
|
||||
|
||||
ODBG(ODT_Mapping) << "Calling targetDataMapper for the " << I
|
||||
<< "th argument";
|
||||
map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
|
||||
int Ret = targetDataMapper(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
|
||||
ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo,
|
||||
targetDataUpdate);
|
||||
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Call to targetDataUpdate via targetDataMapper for custom mapper"
|
||||
" failed.\n");
|
||||
REPORT() << "Call to targetDataUpdate via targetDataMapper for custom "
|
||||
"mapper failed.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
@ -1470,8 +1489,9 @@ class PrivateArgumentManagerTy {
|
||||
// See if the pointee's begin address has corresponding storage on device.
|
||||
void *TgtPteeBegin = [&]() -> void * {
|
||||
if (!HstPteeBegin) {
|
||||
DP("Corresponding-pointer-initialization: pointee begin address is "
|
||||
"null\n");
|
||||
ODBG(ODT_Mapping)
|
||||
<< "Corresponding-pointer-initialization: pointee begin address is "
|
||||
"null";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@ -1582,9 +1602,10 @@ class PrivateArgumentManagerTy {
|
||||
HstPteeBegin);
|
||||
|
||||
// Store the target pointee base address to the first VoidPtrSize bytes
|
||||
DP("Initializing corresponding-pointer-initialization source buffer "
|
||||
"for " DPxMOD ", with pointee base " DPxMOD "\n",
|
||||
DPxPTR(HstPtr), DPxPTR(TgtPteeBase));
|
||||
ODBG(ODT_Mapping)
|
||||
<< "Corresponding-pointer-initialization: setting target pointee base "
|
||||
"for "
|
||||
<< HstPtr << ", with pointee base " << TgtPteeBase;
|
||||
std::memcpy(Buffer, &TgtPteeBase, VoidPtrSize);
|
||||
if (HstPtrSize <= VoidPtrSize)
|
||||
return;
|
||||
@ -1592,10 +1613,10 @@ class PrivateArgumentManagerTy {
|
||||
// For Fortran descriptors, copy the remaining descriptor fields from host
|
||||
uint64_t HstDescriptorFieldsSize = HstPtrSize - VoidPtrSize;
|
||||
void *HstDescriptorFieldsAddr = static_cast<char *>(HstPtr) + VoidPtrSize;
|
||||
DP("Copying %" PRId64
|
||||
" bytes of descriptor fields into corresponding-pointer-initialization "
|
||||
"buffer at offset %" PRId64 ", from " DPxMOD "\n",
|
||||
HstDescriptorFieldsSize, VoidPtrSize, DPxPTR(HstDescriptorFieldsAddr));
|
||||
ODBG(ODT_Mapping) << "Corresponding-pointer-initialization: copying "
|
||||
<< HstDescriptorFieldsSize
|
||||
<< " bytes of descriptor fields into buffer at offset "
|
||||
<< VoidPtrSize << ", from " << HstDescriptorFieldsAddr;
|
||||
std::memcpy(Buffer + VoidPtrSize, HstDescriptorFieldsAddr,
|
||||
HstDescriptorFieldsSize);
|
||||
}
|
||||
@ -1634,21 +1655,21 @@ public:
|
||||
AllocImmediately) {
|
||||
TgtPtr = Device.allocData(ArgSize, HstPtr);
|
||||
if (!TgtPtr) {
|
||||
DP("Data allocation for %sprivate array " DPxMOD " failed.\n",
|
||||
(IsFirstPrivate ? "first-" : ""), DPxPTR(HstPtr));
|
||||
ODBG(ODT_Alloc) << "Data allocation for "
|
||||
<< (IsFirstPrivate ? "first-" : "") << "private array "
|
||||
<< HstPtr << " failed.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
#ifdef OMPTARGET_DEBUG
|
||||
void *TgtPtrBase = (void *)((intptr_t)TgtPtr + ArgOffset);
|
||||
DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD
|
||||
" for %sprivate array " DPxMOD " - pushing target argument " DPxMOD
|
||||
"\n",
|
||||
ArgSize, DPxPTR(TgtPtr), (IsFirstPrivate ? "first-" : ""),
|
||||
DPxPTR(HstPtr), DPxPTR(TgtPtrBase));
|
||||
#endif
|
||||
|
||||
ODBG(ODT_Alloc) << "Allocated " << ArgSize
|
||||
<< " bytes of target memory at " << TgtPtr << " for "
|
||||
<< (IsFirstPrivate ? "first-" : "") << "private array "
|
||||
<< HstPtr << " - pushing target argument "
|
||||
<< (void *)((intptr_t)TgtPtr + ArgOffset);
|
||||
|
||||
// If first-private, copy data from host
|
||||
if (IsFirstPrivate) {
|
||||
DP("Submitting firstprivate data to the device.\n");
|
||||
ODBG(ODT_Mapping) << "Submitting firstprivate data to the device.";
|
||||
|
||||
// The source value used for corresponding-pointer-initialization
|
||||
// is different vs regular firstprivates.
|
||||
@ -1659,16 +1680,18 @@ public:
|
||||
: HstPtr;
|
||||
int Ret = Device.submitData(TgtPtr, DataSource, ArgSize, AsyncInfo);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
DP("Copying %s data to device failed.\n",
|
||||
IsCorrespondingPointerInit ? "corresponding-pointer-initialization"
|
||||
: "firstprivate");
|
||||
ODBG(ODT_Mapping) << "Copying "
|
||||
<< (IsCorrespondingPointerInit
|
||||
? "corresponding-pointer-initialization"
|
||||
: "firstprivate")
|
||||
<< " data to device failed.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
}
|
||||
TgtPtrs.push_back(TgtPtr);
|
||||
} else {
|
||||
DP("Firstprivate array " DPxMOD " of size %" PRId64 " will be packed\n",
|
||||
DPxPTR(HstPtr), ArgSize);
|
||||
ODBG(ODT_Mapping) << "Firstprivate array " << HstPtr << " of size "
|
||||
<< ArgSize << " will be packed";
|
||||
// When reach this point, the argument must meet all following
|
||||
// requirements:
|
||||
// 1. Its size does not exceed the threshold (see the comment for
|
||||
@ -1742,17 +1765,18 @@ public:
|
||||
void *TgtPtr =
|
||||
Device.allocData(FirstPrivateArgSize, FirstPrivateArgBuffer.data());
|
||||
if (TgtPtr == nullptr) {
|
||||
DP("Failed to allocate target memory for private arguments.\n");
|
||||
ODBG(ODT_Alloc)
|
||||
<< "Failed to allocate target memory for private arguments.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
TgtPtrs.push_back(TgtPtr);
|
||||
DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD "\n",
|
||||
FirstPrivateArgSize, DPxPTR(TgtPtr));
|
||||
ODBG(ODT_Alloc) << "Allocated " << FirstPrivateArgSize
|
||||
<< " bytes of target memory at " << TgtPtr;
|
||||
// Transfer data to target device
|
||||
int Ret = Device.submitData(TgtPtr, FirstPrivateArgBuffer.data(),
|
||||
FirstPrivateArgSize, AsyncInfo);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
DP("Failed to submit data of private arguments.\n");
|
||||
ODBG(ODT_DataTransfer) << "Failed to submit data of private arguments.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
// Fill in all placeholder pointers
|
||||
@ -1764,10 +1788,9 @@ public:
|
||||
TP += Info.Padding;
|
||||
Ptr = reinterpret_cast<void *>(TP);
|
||||
TP += Info.Size;
|
||||
DP("Firstprivate array " DPxMOD " of size %" PRId64 " mapped to " DPxMOD
|
||||
"\n",
|
||||
DPxPTR(Info.HstPtrBegin), Info.HstPtrEnd - Info.HstPtrBegin,
|
||||
DPxPTR(Ptr));
|
||||
ODBG(ODT_Mapping) << "Firstprivate array " << Info.HstPtrBegin
|
||||
<< " of size " << (Info.HstPtrEnd - Info.HstPtrBegin)
|
||||
<< " mapped to " << Ptr;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1779,7 +1802,7 @@ public:
|
||||
for (void *P : TgtPtrs) {
|
||||
int Ret = Device.deleteData(P);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
DP("Deallocation of (first-)private arrays failed.\n");
|
||||
ODBG(ODT_Alloc) << "Deallocation of (first-)private arrays failed.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
}
|
||||
@ -1814,7 +1837,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
ArgTypes, ArgNames, ArgMappers, AsyncInfo,
|
||||
&AttachInfo, false /*FromMapper=*/);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Call to targetDataBegin failed, abort target.\n");
|
||||
REPORT() << "Call to targetDataBegin failed, abort target.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
@ -1822,7 +1845,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
if (!AttachInfo.AttachEntries.empty()) {
|
||||
Ret = processAttachEntries(*DeviceOrErr, AttachInfo, AsyncInfo);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Failed to process ATTACH entries.\n");
|
||||
REPORT() << "Failed to process ATTACH entries.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
}
|
||||
@ -1847,7 +1870,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
void *HstPtrBase = Args[Idx];
|
||||
void *TgtPtrBase =
|
||||
(void *)((intptr_t)TgtArgs[TgtIdx] + TgtOffsets[TgtIdx]);
|
||||
DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase));
|
||||
ODBG(ODT_Mapping) << "Parent lambda base " << TgtPtrBase;
|
||||
uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
|
||||
void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta);
|
||||
void *&PointerTgtPtrBegin = AsyncInfo.getVoidPtrLocation();
|
||||
@ -1857,23 +1880,24 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
/*UseHoldRefCount=*/false);
|
||||
PointerTgtPtrBegin = TPR.TargetPointer;
|
||||
if (!TPR.isPresent()) {
|
||||
DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n",
|
||||
DPxPTR(HstPtrVal));
|
||||
ODBG(ODT_Mapping) << "No lambda captured variable mapped "
|
||||
<< HstPtrVal << " - ignored";
|
||||
continue;
|
||||
}
|
||||
if (TPR.Flags.IsHostPointer) {
|
||||
DP("Unified memory is active, no need to map lambda captured"
|
||||
"variable (" DPxMOD ")\n",
|
||||
DPxPTR(HstPtrVal));
|
||||
ODBG(ODT_Mapping)
|
||||
<< "Unified memory is active, no need to map lambda captured"
|
||||
"variable ("
|
||||
<< HstPtrVal << ")";
|
||||
continue;
|
||||
}
|
||||
DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n",
|
||||
DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin));
|
||||
ODBG(ODT_Mapping) << "Update lambda reference (" << PointerTgtPtrBegin
|
||||
<< ") -> [" << TgtPtrBegin << "]";
|
||||
Ret =
|
||||
DeviceOrErr->submitData(TgtPtrBegin, &PointerTgtPtrBegin,
|
||||
sizeof(void *), AsyncInfo, TPR.getEntry());
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Copying data to device failed.\n");
|
||||
REPORT() << "Copying data to device failed.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
}
|
||||
@ -1886,8 +1910,8 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
ptrdiff_t TgtBaseOffset;
|
||||
TargetPointerResultTy TPR;
|
||||
if (ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) {
|
||||
DP("Forwarding first-private value " DPxMOD " to the target construct\n",
|
||||
DPxPTR(HstPtrBase));
|
||||
ODBG(ODT_Mapping) << "Forwarding first-private value " << HstPtrBase
|
||||
<< " to the target construct";
|
||||
TgtPtrBegin = HstPtrBase;
|
||||
TgtBaseOffset = 0;
|
||||
} else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE) {
|
||||
@ -1936,9 +1960,10 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
/*TgtArgsIndex=*/TgtArgs.size(), HstPtrName, AllocImmediately,
|
||||
HstPteeBase, HstPteeBegin, /*IsCorrespondingPointerInit=*/IsAttach);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Failed to process %s%sprivate argument " DPxMOD "\n",
|
||||
IsAttach ? "corresponding-pointer-initialization " : "",
|
||||
(IsFirstPrivate ? "first-" : ""), DPxPTR(HstPtrBegin));
|
||||
REPORT() << "Failed to process "
|
||||
<< (IsAttach ? "corresponding-pointer-initialization " : "")
|
||||
<< (IsFirstPrivate ? "first-" : "") << "private argument "
|
||||
<< HstPtrBegin << ".";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
} else {
|
||||
@ -1950,11 +1975,9 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
/*UseHoldRefCount=*/false);
|
||||
TgtPtrBegin = TPR.TargetPointer;
|
||||
TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
|
||||
#ifdef OMPTARGET_DEBUG
|
||||
void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
|
||||
DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD "\n",
|
||||
DPxPTR(TgtPtrBase), DPxPTR(HstPtrBegin));
|
||||
#endif
|
||||
ODBG(ODT_Mapping) << "Obtained target argument " << TgtPtrBase
|
||||
<< " from host pointer " << HstPtrBegin;
|
||||
}
|
||||
TgtArgsPositions[I] = TgtArgs.size();
|
||||
TgtArgs.push_back(TgtPtrBegin);
|
||||
@ -1967,7 +1990,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
// Pack and transfer first-private arguments
|
||||
Ret = PrivateArgumentManager.packAndTransfer(TgtArgs);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
DP("Failed to pack and transfer first private arguments\n");
|
||||
ODBG(ODT_Mapping) << "Failed to pack and transfer first private arguments";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
@ -1991,7 +2014,7 @@ static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
int Ret = targetDataEnd(Loc, *DeviceOrErr, ArgNum, ArgBases, Args, ArgSizes,
|
||||
ArgTypes, ArgNames, ArgMappers, AsyncInfo);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Call to targetDataEnd failed, abort target.\n");
|
||||
REPORT() << "Call to targetDataEnd failed, abort target.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
@ -2003,7 +2026,7 @@ static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
std::move(PrivateArgumentManager)]() mutable -> int {
|
||||
int Ret = PrivateArgumentManager.free();
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Failed to deallocate target memory for private args\n");
|
||||
REPORT() << "Failed to deallocate target memory for private args";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
return Ret;
|
||||
@ -2025,8 +2048,8 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
|
||||
TableMap *TM = getTableMap(HostPtr);
|
||||
// No map for this host pointer found!
|
||||
if (!TM) {
|
||||
REPORT("Host ptr " DPxMOD " does not have a matching target pointer.\n",
|
||||
DPxPTR(HostPtr));
|
||||
REPORT() << "Host ptr " << HostPtr
|
||||
<< " does not have a matching target pointer.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
@ -2040,7 +2063,7 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
|
||||
}
|
||||
assert(TargetTable && "Global data has not been mapped\n");
|
||||
|
||||
DP("loop trip count is %" PRIu64 ".\n", KernelArgs.Tripcount);
|
||||
ODBG(ODT_Kernel) << "loop trip count is " << KernelArgs.Tripcount;
|
||||
|
||||
// We need to keep bases and offsets separate. Sometimes (e.g. in OpenCL) we
|
||||
// need to manifest base pointers prior to launching a kernel. Even if we have
|
||||
@ -2066,7 +2089,7 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
|
||||
KernelArgs.ArgNames, KernelArgs.ArgMappers, TgtArgs,
|
||||
TgtOffsets, PrivateArgumentManager, AsyncInfo);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Failed to process data before launching the kernel.\n");
|
||||
REPORT() << "Failed to process data before launching the kernel.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
@ -2079,9 +2102,10 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
|
||||
|
||||
// Launch device execution.
|
||||
void *TgtEntryPtr = TargetTable->EntriesBegin[TM->Index].Address;
|
||||
DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n",
|
||||
TargetTable->EntriesBegin[TM->Index].SymbolName, DPxPTR(TgtEntryPtr),
|
||||
TM->Index);
|
||||
ODBG(ODT_Kernel) << "Launching target execution "
|
||||
<< TargetTable->EntriesBegin[TM->Index].SymbolName
|
||||
<< " with pointer " << TgtEntryPtr << " (index=" << TM->Index
|
||||
<< ").";
|
||||
|
||||
{
|
||||
assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!");
|
||||
@ -2105,7 +2129,7 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
|
||||
}
|
||||
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Executing target region abort target.\n");
|
||||
REPORT() << "Executing target region abort target.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
@ -2118,7 +2142,7 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
|
||||
KernelArgs.ArgNames, KernelArgs.ArgMappers,
|
||||
PrivateArgumentManager, AsyncInfo);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Failed to process data after launching the kernel.\n");
|
||||
REPORT() << "Failed to process data after launching the kernel.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
}
|
||||
@ -2150,8 +2174,8 @@ int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr,
|
||||
// Fail if the table map fails to find the target kernel pointer for the
|
||||
// provided host pointer.
|
||||
if (!TM) {
|
||||
REPORT("Host ptr " DPxMOD " does not have a matching target pointer.\n",
|
||||
DPxPTR(HostPtr));
|
||||
REPORT() << "Host ptr " << HostPtr
|
||||
<< " does not have a matching target pointer.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
@ -2168,9 +2192,10 @@ int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr,
|
||||
// Retrieve the target kernel pointer, allocate and store the recorded device
|
||||
// memory data, and launch device execution.
|
||||
void *TgtEntryPtr = TargetTable->EntriesBegin[TM->Index].Address;
|
||||
DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n",
|
||||
TargetTable->EntriesBegin[TM->Index].SymbolName, DPxPTR(TgtEntryPtr),
|
||||
TM->Index);
|
||||
ODBG(ODT_Kernel) << "Launching target execution "
|
||||
<< TargetTable->EntriesBegin[TM->Index].SymbolName
|
||||
<< " with pointer " << TgtEntryPtr << " (index=" << TM->Index
|
||||
<< ").";
|
||||
|
||||
void *TgtPtr = Device.allocData(DeviceMemorySize, /*HstPtr=*/nullptr,
|
||||
TARGET_ALLOC_DEFAULT);
|
||||
@ -2187,7 +2212,7 @@ int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr,
|
||||
AsyncInfo);
|
||||
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Executing target region abort target.\n");
|
||||
REPORT() << "Executing target region abort target.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user