Setting the prescriptiveness of the num_threads clause to 'strict' and having a corresponding check (with message and severity clauses) does not align well with how OpenMP should be handled for GPUs. The num_threads expression may be an arbitrary integer expression which is evaluated on the target, in correspondance to the OpenMP spec. This prevents the check from being done before launching the kernel, especially considering that the num_threads clause is associated with the parallel directive and that there may be multiple parallel directives with different num_threads clauses in a single target region. Acting on the result of the 'strict' check on the GPU would require doing I/O on the GPU, which can introduce performance regressions. Delaying any actions resulting from the 'strict' check and doing them on the host after executing the target region involves additional data copies and is not really semantically correct. For now, the 'strict' modifier for the num_threads clause and its associated message and severity clause are set to be unsupported on GPUs. Targets other than GPUs still support the aforementioned features in the context of an OpenMP target region.
109 lines
3.2 KiB
C++
109 lines
3.2 KiB
C++
// RUN: %clang_cc1 -DF1 -verify -fopenmp -fopenmp-version=60 -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host-ppc.bc
|
|
// RUN: %clang_cc1 -DF1 -DTARGET -verify -fopenmp -fopenmp-version=60 -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host-ppc.bc -o /dev/null
|
|
// RUN: %clang_cc1 -DF2 -verify -fopenmp -fopenmp-version=60 -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host-ppc.bc
|
|
// RUN: %clang_cc1 -DF2 -DTARGET -verify -fopenmp -fopenmp-version=60 -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host-ppc.bc -o /dev/null
|
|
// RUN: %clang_cc1 -DF3 -verify -fopenmp -fopenmp-version=60 -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host-ppc.bc
|
|
// RUN: %clang_cc1 -DF3 -DTARGET -verify -fopenmp -fopenmp-version=60 -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host-ppc.bc -o /dev/null
|
|
|
|
#ifndef TARGET
|
|
// expected-no-diagnostics
|
|
#endif
|
|
|
|
#ifdef F3
|
|
template<typename tx>
|
|
tx ftemplate(int n) {
|
|
tx a = 0;
|
|
|
|
#ifdef TARGET
|
|
// expected-warning@+2 {{modifier 'strict' is currently not supported on a GPU for the 'num_threads' clause; modifier ignored}}
|
|
#endif
|
|
#pragma omp parallel num_threads(strict: tx(20)) severity(fatal) message("msg")
|
|
{
|
|
}
|
|
|
|
short b = 1;
|
|
#ifdef TARGET
|
|
// expected-warning@+2 {{modifier 'strict' is currently not supported on a GPU for the 'num_threads' clause; modifier ignored}}
|
|
#endif
|
|
#pragma omp parallel num_threads(strict: b) severity(warning) message("msg")
|
|
{
|
|
a += b;
|
|
}
|
|
|
|
return a;
|
|
}
|
|
#endif
|
|
|
|
#ifdef F2
|
|
static
|
|
int fstatic(int n) {
|
|
|
|
#ifdef TARGET
|
|
// expected-warning@+2 {{modifier 'strict' is currently not supported on a GPU for the 'num_threads' clause; modifier ignored}}
|
|
#endif
|
|
#pragma omp target parallel num_threads(strict: n) message("msg")
|
|
{
|
|
}
|
|
|
|
#ifdef TARGET
|
|
// expected-warning@+2 {{modifier 'strict' is currently not supported on a GPU for the 'num_threads' clause; modifier ignored}}
|
|
#endif
|
|
#pragma omp target parallel num_threads(strict: 32+n) severity(warning)
|
|
{
|
|
}
|
|
|
|
return n+1;
|
|
}
|
|
#endif
|
|
|
|
#ifdef F1
|
|
struct S1 {
|
|
double a;
|
|
|
|
int r1(int n){
|
|
int b = 1;
|
|
|
|
#ifdef TARGET
|
|
// expected-warning@+2 {{modifier 'strict' is currently not supported on a GPU for the 'num_threads' clause; modifier ignored}}
|
|
#endif
|
|
#pragma omp parallel num_threads(strict: n-b) severity(warning) message("msg")
|
|
{
|
|
this->a = (double)b + 1.5;
|
|
}
|
|
|
|
#ifdef TARGET
|
|
// expected-warning@+2 {{modifier 'strict' is currently not supported on a GPU for the 'num_threads' clause; modifier ignored}}
|
|
#endif
|
|
#pragma omp parallel num_threads(strict: 1024) severity(fatal)
|
|
{
|
|
this->a = 2.5;
|
|
}
|
|
|
|
return (int)a;
|
|
}
|
|
};
|
|
#endif
|
|
|
|
int bar(int n){
|
|
int a = 0;
|
|
|
|
#ifdef F1
|
|
#pragma omp target
|
|
{
|
|
S1 S;
|
|
a += S.r1(n);
|
|
}
|
|
#endif
|
|
|
|
#ifdef F2
|
|
a += fstatic(n);
|
|
#endif
|
|
|
|
#ifdef F3
|
|
#pragma omp target
|
|
a += ftemplate<int>(n);
|
|
#endif
|
|
|
|
return a;
|
|
}
|