
Implement the remaining GOMP_* functions to support task reductions in taskgroup, parallel, loop, and taskloop constructs. The unused mem argument to many of the work-sharing constructs has to do with the scan() directive/ inscan() modifier. If mem is set, each function will call KMP_FATAL() and tell the user scan/inscan is unsupported. The GOMP reduction implementation is kept separate from our implementation because of how GOMP presents reduction data and computes the reductions. GOMP expects the privatized copies to be present even after a #pragma omp parallel reduction(task:...) region has ended so the data is stored inside GOMP's uintptr_t* data pseudo-structure. This style is tightly coupled with GCC compiler codegen. There also isn't any init(), combiner(), fini() functions in GOMP's codegen so the two implementations were to disparate to try to wrap GOMP's around our own. Differential Revision: https://reviews.llvm.org/D98806
72 lines
1.6 KiB
C
72 lines
1.6 KiB
C
// RUN: %libomp-compile-and-run
|
|
|
|
// Parsing error until gcc8:
|
|
// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8
|
|
|
|
// Parsing error until clang11:
|
|
// UNSUPPORTED: clang-10, clang-9, clang-8, clang-7
|
|
|
|
// No icc compiler support yet
|
|
// XFAIL: icc
|
|
|
|
#include <stdio.h>
|
|
#include <omp.h>
|
|
|
|
int r;
|
|
|
|
int work(int k, int l)
|
|
{
|
|
return k + l + 1;
|
|
}
|
|
void bar(int i) {
|
|
#pragma omp taskgroup task_reduction(+:r)
|
|
{ int th_gen = omp_get_thread_num();
|
|
#pragma omp task in_reduction(+:r) firstprivate(i, th_gen)
|
|
{
|
|
r += work(i, 0);
|
|
printf("executing task (%d, 0), th %d (gen by th %d)\n", i, omp_get_thread_num(), th_gen);
|
|
}
|
|
#pragma omp task in_reduction(+:r) firstprivate(i, th_gen)
|
|
{
|
|
r += work(i, 1);
|
|
printf("executing task (%d, 1), th %d (gen by th %d)\n", i, omp_get_thread_num(), th_gen);
|
|
}
|
|
}
|
|
}
|
|
int foo() {
|
|
int i;
|
|
int th_gen = omp_get_thread_num();
|
|
#pragma omp taskgroup task_reduction(+:r)
|
|
{
|
|
bar(0);
|
|
}
|
|
printf("th %d passed bar0\n", th_gen);
|
|
#pragma omp taskloop reduction(+:r) firstprivate(th_gen)
|
|
for (i = 1; i < 4; ++i) {
|
|
bar(i);
|
|
printf("th %d (gen by th %d) passed bar%d in taskloop\n", omp_get_thread_num(), th_gen, i);
|
|
#pragma omp task in_reduction(+:r)
|
|
r += i;
|
|
}
|
|
return 0;
|
|
}
|
|
// res = ((1+2)+(2+3)+(3+4)+(4+5)+1+2+3) = 30
|
|
#define res 30
|
|
int main()
|
|
{
|
|
r = 0;
|
|
#pragma omp parallel num_threads(2)
|
|
{ // barrier ensures threads have started before tasks creation
|
|
#pragma omp barrier
|
|
// single ensures no race condition between taskgroup reductions
|
|
#pragma omp single nowait
|
|
foo();
|
|
}
|
|
if (r == res) {
|
|
return 0;
|
|
} else {
|
|
printf("error r = %d (!= %d)\n", r, res);
|
|
return 1;
|
|
}
|
|
}
|