
Add a `REQUIRES: unified_shared_memory` option to tests that use `#pragma omp requires unified_shared_memory`. For CUDA, the feature tag is derived from LIBOMPTARGET_DEP_CUDA_ARCH which itself is derived using [[ https://cmake.org/cmake/help/latest/module/FindCUDA.html#commands | cuda_select_nvcc_arch_flags ]]. The latter determines which compute capability the GPU in the system supports. To ensure that this is the CUDA arch being used, we could also set the `-Xopenmp-target -march=` flag. In the absence of an NVIDIA GPU, LIBOMPTARGET_DEP_CUDA_ARCH will be 35. That is, in that case we are assuming unified_shared_memory is not available. CUDA plugin testing could be disabled entirely in this case, but this currently depends on `LIBOMPTARGET_CAN_LINK_LIBCUDA OR LIBOMPTARGET_FORCE_DLOPEN_LIBCUDA`, not on whether the hardware is actually available. For all other targets, nothing changes and we are assuming unified shared memory is available. This might need refinement if not the case. This tries to fix the [[ http://meinersbur.de:8011/#/builders/143 | OpenMP Offloading Buildbot ]] that, although brand-new, only has a Pascal-generation (sm_61) GPU installed. Hence, tests that require unified shared memory are currently failing. I wish I had known in advance. Reviewed By: protze.joachim, tianshilei1992 Differential Revision: https://reviews.llvm.org/D101498
137 lines
3.4 KiB
C
137 lines
3.4 KiB
C
// RUN: %libomptarget-compile-run-and-check-generic
|
|
|
|
// REQUIRES: unified_shared_memory
|
|
// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9
|
|
|
|
// amdgcn does not have printf definition
|
|
// XFAIL: amdgcn-amd-amdhsa
|
|
|
|
#include <omp.h>
|
|
#include <stdio.h>
|
|
|
|
#pragma omp requires unified_shared_memory
|
|
|
|
#define N 1024
|
|
|
|
int main(int argc, char *argv[]) {
|
|
int fails;
|
|
void *host_alloc, *device_alloc;
|
|
void *host_data, *device_data;
|
|
int *alloc = (int *)malloc(N * sizeof(int));
|
|
int data[N];
|
|
|
|
for (int i = 0; i < N; ++i) {
|
|
alloc[i] = 10;
|
|
data[i] = 1;
|
|
}
|
|
|
|
host_data = &data[0];
|
|
host_alloc = &alloc[0];
|
|
|
|
//
|
|
// Test that updates on the device are not visible to host
|
|
// when only a TO mapping is used.
|
|
//
|
|
#pragma omp target map(tofrom \
|
|
: device_data, device_alloc) map(close, to \
|
|
: alloc[:N], data \
|
|
[:N])
|
|
{
|
|
device_data = &data[0];
|
|
device_alloc = &alloc[0];
|
|
|
|
for (int i = 0; i < N; i++) {
|
|
alloc[i] += 1;
|
|
data[i] += 1;
|
|
}
|
|
}
|
|
|
|
// CHECK: Address of alloc on device different from host address.
|
|
if (device_alloc != host_alloc)
|
|
printf("Address of alloc on device different from host address.\n");
|
|
|
|
// CHECK: Address of data on device different from host address.
|
|
if (device_data != host_data)
|
|
printf("Address of data on device different from host address.\n");
|
|
|
|
// On the host, check that the arrays have been updated.
|
|
// CHECK: Alloc host values not updated: Succeeded
|
|
fails = 0;
|
|
for (int i = 0; i < N; i++) {
|
|
if (alloc[i] != 10)
|
|
fails++;
|
|
}
|
|
printf("Alloc host values not updated: %s\n",
|
|
(fails == 0) ? "Succeeded" : "Failed");
|
|
|
|
// CHECK: Data host values not updated: Succeeded
|
|
fails = 0;
|
|
for (int i = 0; i < N; i++) {
|
|
if (data[i] != 1)
|
|
fails++;
|
|
}
|
|
printf("Data host values not updated: %s\n",
|
|
(fails == 0) ? "Succeeded" : "Failed");
|
|
|
|
//
|
|
// Test that updates on the device are visible on host
|
|
// when a from is used.
|
|
//
|
|
|
|
for (int i = 0; i < N; i++) {
|
|
alloc[i] += 1;
|
|
data[i] += 1;
|
|
}
|
|
|
|
#pragma omp target map(close, tofrom : alloc[:N], data[:N])
|
|
{
|
|
// CHECK: Alloc device values are correct: Succeeded
|
|
fails = 0;
|
|
for (int i = 0; i < N; i++) {
|
|
if (alloc[i] != 11)
|
|
fails++;
|
|
}
|
|
printf("Alloc device values are correct: %s\n",
|
|
(fails == 0) ? "Succeeded" : "Failed");
|
|
// CHECK: Data device values are correct: Succeeded
|
|
fails = 0;
|
|
for (int i = 0; i < N; i++) {
|
|
if (data[i] != 2)
|
|
fails++;
|
|
}
|
|
printf("Data device values are correct: %s\n",
|
|
(fails == 0) ? "Succeeded" : "Failed");
|
|
|
|
// Update values on the device
|
|
for (int i = 0; i < N; i++) {
|
|
alloc[i] += 1;
|
|
data[i] += 1;
|
|
}
|
|
}
|
|
|
|
// CHECK: Alloc host values updated: Succeeded
|
|
fails = 0;
|
|
for (int i = 0; i < N; i++) {
|
|
if (alloc[i] != 12)
|
|
fails++;
|
|
}
|
|
printf("Alloc host values updated: %s\n",
|
|
(fails == 0) ? "Succeeded" : "Failed");
|
|
|
|
// CHECK: Data host values updated: Succeeded
|
|
fails = 0;
|
|
for (int i = 0; i < N; i++) {
|
|
if (data[i] != 3)
|
|
fails++;
|
|
}
|
|
printf("Data host values updated: %s\n",
|
|
(fails == 0) ? "Succeeded" : "Failed");
|
|
|
|
free(alloc);
|
|
|
|
// CHECK: Done!
|
|
printf("Done!\n");
|
|
|
|
return 0;
|
|
}
|