[OpenMP][OMPT] Fix target enter data callback ordering & reported device num

This patch fixes: https://github.com/llvm/llvm-project/issues/64738
We observed multiple issues, primarily that the `DeviceId` was reported as -1
in certain scenarios. The reason for this is simply that the device is not
initialized at that point. Hence, we need to move the RAII object creation just
after the `checkDeviceAndCtors`, closer to the actual call we want to observe.

This also solves an odering issue where one `target enter data` callback would
be executed before the `Init` callback.
Additionally, this change will also fix that the callbacks corresponding to
`enter / exit data` and `update` in conjunction with `nowait` would not result
in the emission of an OMPT callback.

Added a testcase to cover initialized device number and `omp target` constructs.

Reviewed By: dhruvachak

Differential Revision: https://reviews.llvm.org/D157605

(cherry picked from commit 57f0bdc8fb1e66d4ed9cfb57f1ef699eefd99646)
This commit is contained in:
Michael Halkenhaeuser 2023-08-22 12:40:55 -04:00 committed by Tobias Hieta
parent 4d5feafb9d
commit 1d54dc2f75
2 changed files with 147 additions and 20 deletions

View File

@ -108,6 +108,21 @@ targetDataMapper(ident_t *Loc, int64_t DeviceId, int32_t ArgNum,
TargetAsyncInfoTy TargetAsyncInfo(Device);
AsyncInfoTy &AsyncInfo = TargetAsyncInfo;
/// RAII to establish tool anchors before and after data begin / end / update
OMPT_IF_BUILT(assert((TargetDataFunction == targetDataBegin ||
TargetDataFunction == targetDataEnd ||
TargetDataFunction == targetDataUpdate) &&
"Encountered unexpected TargetDataFunction during "
"execution of targetDataMapper");
auto CallbackFunctions =
(TargetDataFunction == targetDataBegin)
? RegionInterface.getCallbacks<ompt_target_enter_data>()
: (TargetDataFunction == targetDataEnd)
? RegionInterface.getCallbacks<ompt_target_exit_data>()
: RegionInterface.getCallbacks<ompt_target_update>();
InterfaceRAII TargetDataRAII(CallbackFunctions, DeviceId,
OMPT_GET_RETURN_ADDRESS(0));)
int Rc = OFFLOAD_SUCCESS;
Rc = TargetDataFunction(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, AsyncInfo,
@ -129,12 +144,6 @@ EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId,
map_var_info_t *ArgNames,
void **ArgMappers) {
TIMESCOPE_WITH_IDENT(Loc);
/// RAII to establish tool anchors before and after data begin
OMPT_IF_BUILT(InterfaceRAII TargetDataEnterRAII(
RegionInterface.getCallbacks<ompt_target_enter_data>(),
DeviceId,
/* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));)
targetDataMapper<AsyncInfoTy>(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, targetDataBegin,
"Entering OpenMP data region", "begin");
@ -161,12 +170,6 @@ EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId,
map_var_info_t *ArgNames,
void **ArgMappers) {
TIMESCOPE_WITH_IDENT(Loc);
/// RAII to establish tool anchors before and after data end
OMPT_IF_BUILT(InterfaceRAII TargetDataExitRAII(
RegionInterface.getCallbacks<ompt_target_exit_data>(),
DeviceId,
/* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));)
targetDataMapper<AsyncInfoTy>(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, targetDataEnd,
"Exiting OpenMP data region", "end");
@ -190,12 +193,6 @@ EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId,
map_var_info_t *ArgNames,
void **ArgMappers) {
TIMESCOPE_WITH_IDENT(Loc);
/// RAII to establish tool anchors before and after data update
OMPT_IF_BUILT(InterfaceRAII TargetDataUpdateRAII(
RegionInterface.getCallbacks<ompt_target_update>(),
DeviceId,
/* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));)
targetDataMapper<AsyncInfoTy>(
Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
ArgMappers, targetDataUpdate, "Updating OpenMP data", "update");
@ -295,7 +292,8 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
DeviceTy &Device = *PM->Devices[DeviceId];
TargetAsyncInfoTy TargetAsyncInfo(Device);
AsyncInfoTy &AsyncInfo = TargetAsyncInfo;
OMPT_IF_BUILT(InterfaceRAII TargetDataAllocRAII(
/// RAII to establish tool anchors before and after target region
OMPT_IF_BUILT(InterfaceRAII TargetRAII(
RegionInterface.getCallbacks<ompt_target>(), DeviceId,
/* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));)
@ -363,7 +361,8 @@ EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId,
return OMP_TGT_FAIL;
}
DeviceTy &Device = *PM->Devices[DeviceId];
OMPT_IF_BUILT(InterfaceRAII TargetDataAllocRAII(
/// RAII to establish tool anchors before and after target region
OMPT_IF_BUILT(InterfaceRAII TargetRAII(
RegionInterface.getCallbacks<ompt_target>(), DeviceId,
/* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));)

View File

@ -0,0 +1,128 @@
// RUN: %libomptarget-compile-run-and-check-generic
// REQUIRES: ompt
// UNSUPPORTED: aarch64-unknown-linux-gnu
// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
// UNSUPPORTED: x86_64-pc-linux-gnu
// UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
/*
* Example OpenMP program that registers EMI callbacks.
* Explicitly testing for an initialized device num and
* #pragma omp target [data enter / data exit / update]
* The latter with the addition of a nowait clause.
*/
#include <omp.h>
#include <stdio.h>
#include "callbacks.h"
#include "register_emi.h"
#define N 100000
#pragma omp declare target
int c[N];
#pragma omp end declare target
int main() {
int a[N];
int b[N];
int i;
for (i = 0; i < N; i++)
a[i] = 0;
for (i = 0; i < N; i++)
b[i] = i;
for (i = 0; i < N; i++)
c[i] = 0;
#pragma omp target enter data map(to : a)
#pragma omp target parallel for
{
for (int j = 0; j < N; j++)
a[j] = b[j];
}
#pragma omp target exit data map(from : a)
#pragma omp target parallel for map(alloc : c)
{
for (int j = 0; j < N; j++)
c[j] = 2 * j + 1;
}
#pragma omp target update from(c) nowait
#pragma omp barrier
int rc = 0;
for (i = 0; i < N; i++) {
if (a[i] != i) {
rc++;
printf("Wrong value: a[%d]=%d\n", i, a[i]);
}
}
for (i = 0; i < N; i++) {
if (c[i] != 2 * i + 1) {
rc++;
printf("Wrong value: c[%d]=%d\n", i, c[i]);
}
}
if (!rc)
printf("Success\n");
return rc;
}
/// CHECK-NOT: Callback Target EMI:
/// CHECK-NOT: device_num=-1
/// CHECK: Callback Init:
/// CHECK: Callback Load:
/// CHECK: Callback Target EMI: kind=2 endpoint=1
/// CHECK-NOT: device_num=-1
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
/// CHECK: Callback Target EMI: kind=2 endpoint=2
/// CHECK-NOT: device_num=-1
/// CHECK: Callback Target EMI: kind=1 endpoint=1
/// CHECK-NOT: device_num=-1
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1
/// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=1
/// CHECK: Callback DataOp EMI: endpoint=1 optype=3
/// CHECK: Callback DataOp EMI: endpoint=2 optype=3
/// CHECK: Callback DataOp EMI: endpoint=1 optype=4
/// CHECK: Callback DataOp EMI: endpoint=2 optype=4
/// CHECK: Callback Target EMI: kind=1 endpoint=2
/// CHECK-NOT: device_num=-1
/// CHECK: Callback Target EMI: kind=3 endpoint=1
/// CHECK-NOT: device_num=-1
/// CHECK: Callback DataOp EMI: endpoint=1 optype=3
/// CHECK: Callback DataOp EMI: endpoint=2 optype=3
/// CHECK: Callback DataOp EMI: endpoint=1 optype=4
/// CHECK: Callback DataOp EMI: endpoint=2 optype=4
/// CHECK: Callback Target EMI: kind=3 endpoint=2
/// CHECK-NOT: device_num=-1
/// CHECK: Callback Target EMI: kind=1 endpoint=1
/// CHECK-NOT: device_num=-1
/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1
/// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=1
/// CHECK: Callback Target EMI: kind=1 endpoint=2
/// CHECK-NOT: device_num=-1
/// CHECK: Callback Target EMI: kind=4 endpoint=1
/// CHECK-NOT: device_num=-1
/// CHECK: Callback DataOp EMI: endpoint=1 optype=3
/// CHECK: Callback DataOp EMI: endpoint=2 optype=3
/// CHECK: Callback Target EMI: kind=4 endpoint=2
/// CHECK-NOT: device_num=-1
/// CHECK: Callback Fini: