llvm-project/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90
Peixin Qiao aa41460311 [flang][OpenMP] Handle the data race for firstprivate and lastprivate
Remove barriers for firstprivate except if the variable is also
lastprivatized. Re-arrange code and put all last-privates inside a
single scf.if.

As OpenMP Spec 5.0, to avoid the data races, concurrent updates of the
original list item must be synchronized with the read of the original
list item that occurs as a result of the firstprivate clause. Adding
barrier(s) before and/or after the worksharing region would remove the
data races, and it is the application(user)'s job. However, when
one list item is in both firstprivate and lastprivate clauses, the
standard (https://www.openmp.org/spec-html/5.0/openmpsu105.html) states
the following:
```
If a list item appears in both firstprivate and lastprivate clauses, the
update required for the lastprivate clause occurs after all
initializations for the firstprivate clause.
```

So, the synchronization should be ensured by compiler such as emiting
one barrier since the lastprivate clause follows the reads of the
original list item performed for the initialization of each of the
firstprivate list item.

Add FIXME for two special cases, sections construct and linear clause.

The data race problem for single construct will be handled later.

This implementation is based on the discussion with OpenMP committee and
clang code (clang/lib/CodeGen/CGStmtOpenMP.cpp).

Reviewed By: kiranchandramohan, NimishMishra

Differential Revision: https://reviews.llvm.org/D131832
2022-08-20 23:31:13 +08:00

278 lines
14 KiB
Fortran

! This test checks lowering of OpenMP DO Directive (Worksharing).
! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s
! CHECK-LABEL: func @_QPsimple_parallel_do()
subroutine simple_parallel_do
integer :: i
! CHECK: omp.parallel
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
!$OMP PARALLEL DO
do i=1, 9
! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) : (!fir.ref<i8>, i32) -> i1
print*, i
end do
! CHECK: omp.yield
! CHECK: omp.terminator
!$OMP END PARALLEL DO
end subroutine
! CHECK-LABEL: func @_QPparallel_do_with_parallel_clauses
! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
subroutine parallel_do_with_parallel_clauses(cond, nt)
logical :: cond
integer :: nt
integer :: i
! CHECK: %[[COND:.*]] = fir.load %[[COND_REF]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[COND_CVT:.*]] = fir.convert %[[COND]] : (!fir.logical<4>) -> i1
! CHECK: %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
! CHECK: omp.parallel if(%[[COND_CVT]] : i1) num_threads(%[[NT]] : i32) proc_bind(close)
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
!$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close)
do i=1, 9
! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) : (!fir.ref<i8>, i32) -> i1
print*, i
end do
! CHECK: omp.yield
! CHECK: omp.terminator
!$OMP END PARALLEL DO
end subroutine
! CHECK-LABEL: func @_QPparallel_do_with_clauses
! CHECK-SAME: %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
subroutine parallel_do_with_clauses(nt)
integer :: nt
integer :: i
! CHECK: %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
! CHECK: omp.parallel num_threads(%[[NT]] : i32)
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
!$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic)
do i=1, 9
! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) : (!fir.ref<i8>, i32) -> i1
print*, i
end do
! CHECK: omp.yield
! CHECK: omp.terminator
!$OMP END PARALLEL DO
end subroutine
!===============================================================================
! Checking for the following construct:
! !$omp parallel do private(...) firstprivate(...)
!===============================================================================
! CHECK-LABEL: func @_QPparallel_do_with_privatisation_clauses
! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
subroutine parallel_do_with_privatisation_clauses(cond,nt)
logical :: cond
integer :: nt
integer :: i
! CHECK: omp.parallel
! CHECK: %[[PRIVATE_COND_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"}
! CHECK: %[[PRIVATE_NT_REF:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"}
! CHECK: %[[NT_VAL:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
! CHECK: fir.store %[[NT_VAL]] to %[[PRIVATE_NT_REF]] : !fir.ref<i32>
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
!$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt)
do i=1, 9
! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) : (!fir.ref<i8>, i32) -> i1
! CHECK: %[[PRIVATE_COND_VAL:.*]] = fir.load %[[PRIVATE_COND_REF]] : !fir.ref<!fir.logical<4>>
! CHECK: %[[PRIVATE_COND_VAL_CVT:.*]] = fir.convert %[[PRIVATE_COND_VAL]] : (!fir.logical<4>) -> i1
! CHECK: fir.call @_FortranAioOutputLogical({{.*}}, %[[PRIVATE_COND_VAL_CVT]]) : (!fir.ref<i8>, i1) -> i1
! CHECK: %[[PRIVATE_NT_VAL:.*]] = fir.load %[[PRIVATE_NT_REF]] : !fir.ref<i32>
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[PRIVATE_NT_VAL]]) : (!fir.ref<i8>, i32) -> i1
print*, i, cond, nt
end do
! CHECK: omp.yield
! CHECK: omp.terminator
!$OMP END PARALLEL DO
end subroutine
!===============================================================================
! Checking for the following construct
! !$omp parallel private(...) firstprivate(...)
! !$omp do
!===============================================================================
subroutine parallel_private_do(cond,nt)
logical :: cond
integer :: nt
integer :: i
!$OMP PARALLEL PRIVATE(cond) FIRSTPRIVATE(nt)
!$OMP DO
do i=1, 9
call foo(i, cond, nt)
end do
!$OMP END DO
!$OMP END PARALLEL
end subroutine parallel_private_do
! CHECK-LABEL: func.func @_QPparallel_private_do(
! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"},
! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}) {
! CHECK: %[[I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFparallel_private_doEi"}
! CHECK: omp.parallel {
! CHECK: %[[I_PRIV:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: %[[COND_ADDR:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_private_doEcond"}
! CHECK: %[[NT_ADDR:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_private_doEnt"}
! CHECK: %[[NT:.*]] = fir.load %[[VAL_1]] : !fir.ref<i32>
! CHECK: fir.store %[[NT]] to %[[NT_ADDR]] : !fir.ref<i32>
! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32
! CHECK: %[[VAL_8:.*]] = arith.constant 9 : i32
! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
! CHECK: fir.store %[[I]] to %[[I_PRIV]] : !fir.ref<i32>
! CHECK: fir.call @_QPfoo(%[[I_PRIV]], %[[COND_ADDR]], %[[NT_ADDR]]) : (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
! CHECK: omp.yield
! CHECK: }
! CHECK: omp.terminator
! CHECK: }
! CHECK: return
! CHECK: }
!===============================================================================
! Checking for the following construct
! !$omp parallel
! !$omp do firstprivate(...) firstprivate(...)
!===============================================================================
subroutine omp_parallel_multiple_firstprivate_do(a, b)
integer::a, b
!$OMP PARALLEL FIRSTPRIVATE(a) FIRSTPRIVATE(b)
!$OMP DO
do i=1, 10
call bar(i, a)
end do
!$OMP END DO
!$OMP END PARALLEL
end subroutine omp_parallel_multiple_firstprivate_do
! CHECK-LABEL: func.func @_QPomp_parallel_multiple_firstprivate_do(
! CHECK-SAME: %[[A_ADDR:.*]]: !fir.ref<i32> {fir.bindc_name = "a"},
! CHECK-SAME: %[[B_ADDR:.*]]: !fir.ref<i32> {fir.bindc_name = "b"}) {
! CHECK: %[[I_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_parallel_multiple_firstprivate_doEi"}
! CHECK: omp.parallel {
! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: %[[A_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_parallel_multiple_firstprivate_doEa"}
! CHECK: %[[A:.*]] = fir.load %[[A_ADDR]] : !fir.ref<i32>
! CHECK: fir.store %[[A]] to %[[A_PRIV_ADDR]] : !fir.ref<i32>
! CHECK: %[[B_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "b", pinned, uniq_name = "_QFomp_parallel_multiple_firstprivate_doEb"}
! CHECK: %[[B:.*]] = fir.load %[[B_ADDR]] : !fir.ref<i32>
! CHECK: fir.store %[[B]] to %[[B_PRIV_ADDR]] : !fir.ref<i32>
! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32
! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32
! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
! CHECK: fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
! CHECK: fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) : (!fir.ref<i32>, !fir.ref<i32>) -> ()
! CHECK: omp.yield
! CHECK: }
! CHECK: omp.terminator
! CHECK: }
! CHECK: return
! CHECK: }
!===============================================================================
! Checking for the following construct
! !$omp parallel
! !$omp do private(...) firstprivate(...)
!===============================================================================
subroutine parallel_do_private(cond,nt)
logical :: cond
integer :: nt
integer :: i
!$OMP PARALLEL
!$OMP DO PRIVATE(cond) FIRSTPRIVATE(nt)
do i=1, 9
call foo(i, cond, nt)
end do
!$OMP END DO
!$OMP END PARALLEL
end subroutine parallel_do_private
! CHECK-LABEL: func.func @_QPparallel_do_private(
! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"},
! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}) {
! CHECK: %[[I_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFparallel_do_privateEi"}
! CHECK: omp.parallel {
! CHECK: %[[COND_ADDR:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_privateEcond"}
! CHECK: %[[NT_ADDR:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_privateEnt"}
! CHECK: %[[NT:.*]] = fir.load %[[VAL_1]] : !fir.ref<i32>
! CHECK: fir.store %[[NT]] to %[[NT_ADDR]] : !fir.ref<i32>
! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32
! CHECK: %[[VAL_8:.*]] = arith.constant 9 : i32
! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
! CHECK: fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
! CHECK: fir.call @_QPfoo(%[[I_PRIV_ADDR]], %[[COND_ADDR]], %[[NT_ADDR]]) : (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
! CHECK: omp.yield
! CHECK: }
! CHECK: omp.terminator
! CHECK: }
! CHECK: return
! CHECK: }
!===============================================================================
! Checking for the following construct
! !$omp parallel
! !$omp do firstprivate(...) firstprivate(...)
!===============================================================================
subroutine omp_parallel_do_multiple_firstprivate(a, b)
integer::a, b
!$OMP PARALLEL
!$OMP DO FIRSTPRIVATE(a) FIRSTPRIVATE(b)
do i=1, 10
call bar(i, a)
end do
!$OMP END DO
!$OMP END PARALLEL
end subroutine omp_parallel_do_multiple_firstprivate
! CHECK-LABEL: func.func @_QPomp_parallel_do_multiple_firstprivate(
! CHECK-SAME: %[[A_ADDR:.*]]: !fir.ref<i32> {fir.bindc_name = "a"},
! CHECK-SAME: %[[B_ADDR:.*]]: !fir.ref<i32> {fir.bindc_name = "b"}) {
! CHECK: %[[I_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_parallel_do_multiple_firstprivateEi"}
! CHECK: omp.parallel {
! CHECK: %[[A_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_parallel_do_multiple_firstprivateEa"}
! CHECK: %[[A:.*]] = fir.load %[[A_ADDR]] : !fir.ref<i32>
! CHECK: fir.store %[[A]] to %[[A_PRIV_ADDR]] : !fir.ref<i32>
! CHECK: %[[B_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "b", pinned, uniq_name = "_QFomp_parallel_do_multiple_firstprivateEb"}
! CHECK: %[[B:.*]] = fir.load %[[B_ADDR]] : !fir.ref<i32>
! CHECK: fir.store %[[B]] to %[[B_PRIV_ADDR]] : !fir.ref<i32>
! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32
! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32
! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
! CHECK: fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
! CHECK: fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) : (!fir.ref<i32>, !fir.ref<i32>) -> ()
! CHECK: omp.yield
! CHECK: }
! CHECK: omp.terminator
! CHECK: }
! CHECK: return
! CHECK: }