Michael Kruse 34ed3e6337 [OpenMP] Test unified shared memory tests only on systems that support it.
Add a `REQUIRES: unified_shared_memory` option to tests that use `#pragma omp requires unified_shared_memory`.

For CUDA, the feature tag is derived from LIBOMPTARGET_DEP_CUDA_ARCH which itself is derived using [[ https://cmake.org/cmake/help/latest/module/FindCUDA.html#commands | cuda_select_nvcc_arch_flags ]]. The latter determines which compute capability the GPU in the system supports. To ensure that this is the CUDA arch being used, we could also set the `-Xopenmp-target -march=` flag.
In the absence of an NVIDIA GPU, LIBOMPTARGET_DEP_CUDA_ARCH will be 35. That is, in that case we are assuming unified_shared_memory is not available. CUDA plugin testing could be disabled entirely in this case, but this currently depends on `LIBOMPTARGET_CAN_LINK_LIBCUDA OR LIBOMPTARGET_FORCE_DLOPEN_LIBCUDA`, not on whether the hardware is actually available.

For all other targets, nothing changes and we are assuming unified shared memory is available. This might need refinement if not the case.

This tries to fix the [[ http://meinersbur.de:8011/#/builders/143 | OpenMP Offloading Buildbot ]] that, although brand-new, only has a Pascal-generation (sm_61) GPU installed. Hence, tests that require unified shared memory are currently failing. I wish I had known in advance.

Reviewed By: protze.joachim, tianshilei1992

Differential Revision: https://reviews.llvm.org/D101498
2021-05-13 11:08:04 -05:00

137 lines
3.4 KiB
C

// RUN: %libomptarget-compile-run-and-check-generic
// REQUIRES: unified_shared_memory
// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9
// amdgcn does not have printf definition
// XFAIL: amdgcn-amd-amdhsa
#include <omp.h>
#include <stdio.h>
#pragma omp requires unified_shared_memory
#define N 1024
int main(int argc, char *argv[]) {
int fails;
void *host_alloc, *device_alloc;
void *host_data, *device_data;
int *alloc = (int *)malloc(N * sizeof(int));
int data[N];
for (int i = 0; i < N; ++i) {
alloc[i] = 10;
data[i] = 1;
}
host_data = &data[0];
host_alloc = &alloc[0];
//
// Test that updates on the device are not visible to host
// when only a TO mapping is used.
//
#pragma omp target map(tofrom \
: device_data, device_alloc) map(close, to \
: alloc[:N], data \
[:N])
{
device_data = &data[0];
device_alloc = &alloc[0];
for (int i = 0; i < N; i++) {
alloc[i] += 1;
data[i] += 1;
}
}
// CHECK: Address of alloc on device different from host address.
if (device_alloc != host_alloc)
printf("Address of alloc on device different from host address.\n");
// CHECK: Address of data on device different from host address.
if (device_data != host_data)
printf("Address of data on device different from host address.\n");
// On the host, check that the arrays have been updated.
// CHECK: Alloc host values not updated: Succeeded
fails = 0;
for (int i = 0; i < N; i++) {
if (alloc[i] != 10)
fails++;
}
printf("Alloc host values not updated: %s\n",
(fails == 0) ? "Succeeded" : "Failed");
// CHECK: Data host values not updated: Succeeded
fails = 0;
for (int i = 0; i < N; i++) {
if (data[i] != 1)
fails++;
}
printf("Data host values not updated: %s\n",
(fails == 0) ? "Succeeded" : "Failed");
//
// Test that updates on the device are visible on host
// when a from is used.
//
for (int i = 0; i < N; i++) {
alloc[i] += 1;
data[i] += 1;
}
#pragma omp target map(close, tofrom : alloc[:N], data[:N])
{
// CHECK: Alloc device values are correct: Succeeded
fails = 0;
for (int i = 0; i < N; i++) {
if (alloc[i] != 11)
fails++;
}
printf("Alloc device values are correct: %s\n",
(fails == 0) ? "Succeeded" : "Failed");
// CHECK: Data device values are correct: Succeeded
fails = 0;
for (int i = 0; i < N; i++) {
if (data[i] != 2)
fails++;
}
printf("Data device values are correct: %s\n",
(fails == 0) ? "Succeeded" : "Failed");
// Update values on the device
for (int i = 0; i < N; i++) {
alloc[i] += 1;
data[i] += 1;
}
}
// CHECK: Alloc host values updated: Succeeded
fails = 0;
for (int i = 0; i < N; i++) {
if (alloc[i] != 12)
fails++;
}
printf("Alloc host values updated: %s\n",
(fails == 0) ? "Succeeded" : "Failed");
// CHECK: Data host values updated: Succeeded
fails = 0;
for (int i = 0; i < N; i++) {
if (data[i] != 3)
fails++;
}
printf("Data host values updated: %s\n",
(fails == 0) ? "Succeeded" : "Failed");
free(alloc);
// CHECK: Done!
printf("Done!\n");
return 0;
}