Remove barriers for firstprivate except if the variable is also lastprivatized. Re-arrange code and put all last-privates inside a single scf.if. As OpenMP Spec 5.0, to avoid the data races, concurrent updates of the original list item must be synchronized with the read of the original list item that occurs as a result of the firstprivate clause. Adding barrier(s) before and/or after the worksharing region would remove the data races, and it is the application(user)'s job. However, when one list item is in both firstprivate and lastprivate clauses, the standard (https://www.openmp.org/spec-html/5.0/openmpsu105.html) states the following: ``` If a list item appears in both firstprivate and lastprivate clauses, the update required for the lastprivate clause occurs after all initializations for the firstprivate clause. ``` So, the synchronization should be ensured by compiler such as emiting one barrier since the lastprivate clause follows the reads of the original list item performed for the initialization of each of the firstprivate list item. Add FIXME for two special cases, sections construct and linear clause. The data race problem for single construct will be handled later. This implementation is based on the discussion with OpenMP committee and clang code (clang/lib/CodeGen/CGStmtOpenMP.cpp). Reviewed By: kiranchandramohan, NimishMishra Differential Revision: https://reviews.llvm.org/D131832
62 lines
2.6 KiB
Fortran
62 lines
2.6 KiB
Fortran
! This test checks lowering of OpenMP parallel DO, with the loop bound being
|
|
! a firstprivate variable
|
|
|
|
! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s
|
|
|
|
! CHECK: func @_QPomp_do_firstprivate(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"})
|
|
subroutine omp_do_firstprivate(a)
|
|
integer::a
|
|
integer::n
|
|
n = a+1
|
|
!$omp parallel do firstprivate(a)
|
|
! CHECK: omp.parallel {
|
|
! CHECK-NEXT: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "a", pinned
|
|
! CHECK-NEXT: %[[LD:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
|
|
! CHECK-NEXT: fir.store %[[LD]] to %[[CLONE]] : !fir.ref<i32>
|
|
! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
|
|
! CHECK: %[[LB:.*]] = arith.constant 1 : i32
|
|
! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE]] : !fir.ref<i32>
|
|
! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
|
|
! CHECK-NEXT: omp.wsloop for (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
|
|
! CHECK-NEXT: fir.store %[[ARG1]] to %[[REF]] : !fir.ref<i32>
|
|
! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) : (!fir.ref<i32>, !fir.ref<i32>) -> ()
|
|
! CHECK-NEXT: omp.yield
|
|
do i=1, a
|
|
call foo(i, a)
|
|
end do
|
|
!$omp end parallel do
|
|
!CHECK: fir.call @_QPbar(%[[ARG0]]) : (!fir.ref<i32>) -> ()
|
|
call bar(a)
|
|
end subroutine omp_do_firstprivate
|
|
|
|
! CHECK: func @_QPomp_do_firstprivate2(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"})
|
|
subroutine omp_do_firstprivate2(a, n)
|
|
integer::a
|
|
integer::n
|
|
n = a+1
|
|
!$omp parallel do firstprivate(a, n)
|
|
! CHECK: omp.parallel {
|
|
! CHECK-NEXT: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "a", pinned
|
|
! CHECK-NEXT: %[[LD:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
|
|
! CHECK-NEXT: fir.store %[[LD]] to %[[CLONE]] : !fir.ref<i32>
|
|
! CHECK-NEXT: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "n", pinned
|
|
! CHECK-NEXT: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
|
|
! CHECK-NEXT: fir.store %[[LD1]] to %[[CLONE1]] : !fir.ref<i32>
|
|
! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
|
|
|
|
|
|
! CHECK: %[[LB:.*]] = fir.load %[[CLONE]] : !fir.ref<i32>
|
|
! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
|
|
! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
|
|
! CHECK-NEXT: omp.wsloop for (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
|
|
! CHECK-NEXT: fir.store %[[ARG2]] to %[[REF]] : !fir.ref<i32>
|
|
! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) : (!fir.ref<i32>, !fir.ref<i32>) -> ()
|
|
! CHECK-NEXT: omp.yield
|
|
do i= a, n
|
|
call foo(i, a)
|
|
end do
|
|
!$omp end parallel do
|
|
!CHECK: fir.call @_QPbar(%[[ARG1]]) : (!fir.ref<i32>) -> ()
|
|
call bar(n)
|
|
end subroutine omp_do_firstprivate2
|