[flang] Introduce hlfir.elemental lowerings to omp.workshare_loop_nest (#104748)
This patch adds parallelization support for the following expression in OpenMP workshare constructs: * Elemental procedures in array expressions (reapplied with linking fix)
This commit is contained in:
parent
cbc7802233
commit
5d38e6e42a
@ -26,6 +26,7 @@
|
|||||||
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
|
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
|
||||||
#include "flang/Optimizer/HLFIR/HLFIROps.h"
|
#include "flang/Optimizer/HLFIR/HLFIROps.h"
|
||||||
#include "flang/Optimizer/HLFIR/Passes.h"
|
#include "flang/Optimizer/HLFIR/Passes.h"
|
||||||
|
#include "flang/Optimizer/OpenMP/Passes.h"
|
||||||
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
|
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
|
||||||
#include "mlir/IR/Dominance.h"
|
#include "mlir/IR/Dominance.h"
|
||||||
#include "mlir/IR/PatternMatch.h"
|
#include "mlir/IR/PatternMatch.h"
|
||||||
@ -792,7 +793,8 @@ struct ElementalOpConversion
|
|||||||
// Generate a loop nest looping around the fir.elemental shape and clone
|
// Generate a loop nest looping around the fir.elemental shape and clone
|
||||||
// fir.elemental region inside the inner loop.
|
// fir.elemental region inside the inner loop.
|
||||||
hlfir::LoopNest loopNest =
|
hlfir::LoopNest loopNest =
|
||||||
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
|
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
|
||||||
|
flangomp::shouldUseWorkshareLowering(elemental));
|
||||||
auto insPt = builder.saveInsertionPoint();
|
auto insPt = builder.saveInsertionPoint();
|
||||||
builder.setInsertionPointToStart(loopNest.body);
|
builder.setInsertionPointToStart(loopNest.body);
|
||||||
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
|
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
|
||||||
|
@ -24,6 +24,7 @@ add_flang_library(HLFIRTransforms
|
|||||||
FIRDialectSupport
|
FIRDialectSupport
|
||||||
FIRSupport
|
FIRSupport
|
||||||
FIRTransforms
|
FIRTransforms
|
||||||
|
FlangOpenMPTransforms
|
||||||
HLFIRDialect
|
HLFIRDialect
|
||||||
MLIRIR
|
MLIRIR
|
||||||
${dialect_libs}
|
${dialect_libs}
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
|
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
|
||||||
#include "flang/Optimizer/HLFIR/HLFIROps.h"
|
#include "flang/Optimizer/HLFIR/HLFIROps.h"
|
||||||
#include "flang/Optimizer/HLFIR/Passes.h"
|
#include "flang/Optimizer/HLFIR/Passes.h"
|
||||||
|
#include "flang/Optimizer/OpenMP/Passes.h"
|
||||||
#include "flang/Optimizer/Transforms/Utils.h"
|
#include "flang/Optimizer/Transforms/Utils.h"
|
||||||
#include "mlir/Dialect/Func/IR/FuncOps.h"
|
#include "mlir/Dialect/Func/IR/FuncOps.h"
|
||||||
#include "mlir/IR/Dominance.h"
|
#include "mlir/IR/Dominance.h"
|
||||||
@ -482,7 +483,8 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite(
|
|||||||
// Generate a loop nest looping around the hlfir.elemental shape and clone
|
// Generate a loop nest looping around the hlfir.elemental shape and clone
|
||||||
// hlfir.elemental region inside the inner loop
|
// hlfir.elemental region inside the inner loop
|
||||||
hlfir::LoopNest loopNest =
|
hlfir::LoopNest loopNest =
|
||||||
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
|
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
|
||||||
|
flangomp::shouldUseWorkshareLowering(elemental));
|
||||||
builder.setInsertionPointToStart(loopNest.body);
|
builder.setInsertionPointToStart(loopNest.body);
|
||||||
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
|
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
|
||||||
loopNest.oneBasedIndices);
|
loopNest.oneBasedIndices);
|
||||||
@ -553,7 +555,8 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite(
|
|||||||
llvm::SmallVector<mlir::Value> extents =
|
llvm::SmallVector<mlir::Value> extents =
|
||||||
hlfir::getIndexExtents(loc, builder, shape);
|
hlfir::getIndexExtents(loc, builder, shape);
|
||||||
hlfir::LoopNest loopNest =
|
hlfir::LoopNest loopNest =
|
||||||
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
|
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
|
||||||
|
flangomp::shouldUseWorkshareLowering(assign));
|
||||||
builder.setInsertionPointToStart(loopNest.body);
|
builder.setInsertionPointToStart(loopNest.body);
|
||||||
auto arrayElement =
|
auto arrayElement =
|
||||||
hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
|
hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
|
||||||
@ -651,7 +654,8 @@ llvm::LogicalResult VariableAssignBufferization::matchAndRewrite(
|
|||||||
llvm::SmallVector<mlir::Value> extents =
|
llvm::SmallVector<mlir::Value> extents =
|
||||||
hlfir::getIndexExtents(loc, builder, shape);
|
hlfir::getIndexExtents(loc, builder, shape);
|
||||||
hlfir::LoopNest loopNest =
|
hlfir::LoopNest loopNest =
|
||||||
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
|
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
|
||||||
|
flangomp::shouldUseWorkshareLowering(assign));
|
||||||
builder.setInsertionPointToStart(loopNest.body);
|
builder.setInsertionPointToStart(loopNest.body);
|
||||||
auto rhsArrayElement =
|
auto rhsArrayElement =
|
||||||
hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
|
hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
|
||||||
|
57
flang/test/HLFIR/bufferize-workshare.fir
Normal file
57
flang/test/HLFIR/bufferize-workshare.fir
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s
|
||||||
|
|
||||||
|
// CHECK-LABEL: func.func @simple(
|
||||||
|
// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<42xi32>>) {
|
||||||
|
// CHECK: omp.parallel {
|
||||||
|
// CHECK: omp.workshare {
|
||||||
|
// CHECK: %[[VAL_1:.*]] = arith.constant 42 : index
|
||||||
|
// CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32
|
||||||
|
// CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
|
||||||
|
// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_3]]) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
|
||||||
|
// CHECK: %[[VAL_5:.*]] = fir.allocmem !fir.array<42xi32> {bindc_name = ".tmp.array", uniq_name = ""}
|
||||||
|
// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]](%[[VAL_3]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>)
|
||||||
|
// CHECK: %[[VAL_7:.*]] = arith.constant true
|
||||||
|
// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
|
||||||
|
// CHECK: omp.workshare.loop_wrapper {
|
||||||
|
// CHECK: omp.loop_nest (%[[VAL_9:.*]]) : index = (%[[VAL_8]]) to (%[[VAL_1]]) inclusive step (%[[VAL_8]]) {
|
||||||
|
// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_9]]) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
|
||||||
|
// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
|
||||||
|
// CHECK: %[[VAL_12:.*]] = arith.subi %[[VAL_11]], %[[VAL_2]] : i32
|
||||||
|
// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]]) : (!fir.heap<!fir.array<42xi32>>, index) -> !fir.ref<i32>
|
||||||
|
// CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_13]] temporary_lhs : i32, !fir.ref<i32>
|
||||||
|
// CHECK: omp.yield
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: %[[VAL_14:.*]] = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1>
|
||||||
|
// CHECK: %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_7]], [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
|
||||||
|
// CHECK: %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_6]]#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
|
||||||
|
// CHECK: hlfir.assign %[[VAL_6]]#0 to %[[VAL_4]]#0 : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>
|
||||||
|
// CHECK: fir.freemem %[[VAL_6]]#0 : !fir.heap<!fir.array<42xi32>>
|
||||||
|
// CHECK: omp.terminator
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: omp.terminator
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return
|
||||||
|
// CHECK: }
|
||||||
|
func.func @simple(%arg: !fir.ref<!fir.array<42xi32>>) {
|
||||||
|
omp.parallel {
|
||||||
|
omp.workshare {
|
||||||
|
%c42 = arith.constant 42 : index
|
||||||
|
%c1_i32 = arith.constant 1 : i32
|
||||||
|
%shape = fir.shape %c42 : (index) -> !fir.shape<1>
|
||||||
|
%array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
|
||||||
|
%elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
|
||||||
|
^bb0(%i: index):
|
||||||
|
%ref = hlfir.designate %array#0 (%i) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
|
||||||
|
%val = fir.load %ref : !fir.ref<i32>
|
||||||
|
%sub = arith.subi %val, %c1_i32 : i32
|
||||||
|
hlfir.yield_element %sub : i32
|
||||||
|
}
|
||||||
|
hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
|
||||||
|
hlfir.destroy %elemental : !hlfir.expr<42xi32>
|
||||||
|
omp.terminator
|
||||||
|
}
|
||||||
|
omp.terminator
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
@ -0,0 +1,34 @@
|
|||||||
|
!===----------------------------------------------------------------------===!
|
||||||
|
! This directory can be used to add Integration tests involving multiple
|
||||||
|
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
|
||||||
|
! contain executable tests. We should only add tests here sparingly and only
|
||||||
|
! if there is no other way to test. Repeat this message in each test that is
|
||||||
|
! added to this directory and sub-directories.
|
||||||
|
!===----------------------------------------------------------------------===!
|
||||||
|
|
||||||
|
!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR
|
||||||
|
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR
|
||||||
|
|
||||||
|
subroutine sb1(x, y)
|
||||||
|
integer :: x(:)
|
||||||
|
integer :: y(:)
|
||||||
|
!$omp parallel workshare
|
||||||
|
x = y
|
||||||
|
!$omp end parallel workshare
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
! HLFIR: omp.parallel {
|
||||||
|
! HLFIR: omp.workshare {
|
||||||
|
! HLFIR: hlfir.assign
|
||||||
|
! HLFIR: omp.terminator
|
||||||
|
! HLFIR: }
|
||||||
|
! HLFIR: omp.terminator
|
||||||
|
! HLFIR: }
|
||||||
|
|
||||||
|
! FIR: omp.parallel {
|
||||||
|
! FIR: omp.wsloop nowait {
|
||||||
|
! FIR: omp.loop_nest
|
||||||
|
! FIR: }
|
||||||
|
! FIR: omp.barrier
|
||||||
|
! FIR: omp.terminator
|
||||||
|
! FIR: }
|
57
flang/test/Integration/OpenMP/workshare-axpy.f90
Normal file
57
flang/test/Integration/OpenMP/workshare-axpy.f90
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
!===----------------------------------------------------------------------===!
|
||||||
|
! This directory can be used to add Integration tests involving multiple
|
||||||
|
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
|
||||||
|
! contain executable tests. We should only add tests here sparingly and only
|
||||||
|
! if there is no other way to test. Repeat this message in each test that is
|
||||||
|
! added to this directory and sub-directories.
|
||||||
|
!===----------------------------------------------------------------------===!
|
||||||
|
|
||||||
|
!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR
|
||||||
|
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR
|
||||||
|
|
||||||
|
subroutine sb1(a, x, y, z)
|
||||||
|
integer :: a
|
||||||
|
integer :: x(:)
|
||||||
|
integer :: y(:)
|
||||||
|
integer :: z(:)
|
||||||
|
!$omp parallel workshare
|
||||||
|
z = a * x + y
|
||||||
|
!$omp end parallel workshare
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
! HLFIR: func.func @_QPsb1
|
||||||
|
! HLFIR: omp.parallel {
|
||||||
|
! HLFIR: omp.workshare {
|
||||||
|
! HLFIR: hlfir.elemental {{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
|
||||||
|
! HLFIR: hlfir.elemental {{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
|
||||||
|
! HLFIR: hlfir.assign
|
||||||
|
! HLFIR: hlfir.destroy
|
||||||
|
! HLFIR: hlfir.destroy
|
||||||
|
! HLFIR-NOT: omp.barrier
|
||||||
|
! HLFIR: omp.terminator
|
||||||
|
! HLFIR: }
|
||||||
|
! HLFIR-NOT: omp.barrier
|
||||||
|
! HLFIR: omp.terminator
|
||||||
|
! HLFIR: }
|
||||||
|
! HLFIR: return
|
||||||
|
! HLFIR: }
|
||||||
|
! HLFIR:}
|
||||||
|
|
||||||
|
|
||||||
|
! FIR: func.func private @_workshare_copy_heap_Uxi32(%{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>, %{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>
|
||||||
|
! FIR: func.func private @_workshare_copy_i32(%{{[a-z0-9]+}}: !fir.ref<i32>, %{{[a-z0-9]+}}: !fir.ref<i32>
|
||||||
|
|
||||||
|
! FIR: func.func @_QPsb1
|
||||||
|
! FIR: omp.parallel {
|
||||||
|
! FIR: omp.single copyprivate(%9 -> @_workshare_copy_i32 : !fir.ref<i32>, %10 -> @_workshare_copy_heap_Uxi32 : !fir.ref<!fir.heap<!fir.array<?xi32>>>) {
|
||||||
|
! FIR: fir.allocmem
|
||||||
|
! FIR: omp.wsloop {
|
||||||
|
! FIR: omp.loop_nest
|
||||||
|
! FIR: omp.single nowait {
|
||||||
|
! FIR: fir.call @_FortranAAssign
|
||||||
|
! FIR: fir.freemem
|
||||||
|
! FIR: omp.terminator
|
||||||
|
! FIR: }
|
||||||
|
! FIR: omp.barrier
|
||||||
|
! FIR: omp.terminator
|
||||||
|
! FIR: }
|
@ -0,0 +1,45 @@
|
|||||||
|
!===----------------------------------------------------------------------===!
|
||||||
|
! This directory can be used to add Integration tests involving multiple
|
||||||
|
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
|
||||||
|
! contain executable tests. We should only add tests here sparingly and only
|
||||||
|
! if there is no other way to test. Repeat this message in each test that is
|
||||||
|
! added to this directory and sub-directories.
|
||||||
|
!===----------------------------------------------------------------------===!
|
||||||
|
|
||||||
|
!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR
|
||||||
|
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR
|
||||||
|
|
||||||
|
subroutine sb1(a, x)
|
||||||
|
integer :: a
|
||||||
|
integer :: x(:)
|
||||||
|
!$omp parallel workshare
|
||||||
|
x = a
|
||||||
|
!$omp end parallel workshare
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
! HLFIR: omp.parallel {
|
||||||
|
! HLFIR: omp.workshare {
|
||||||
|
! HLFIR: %[[SCALAR:.*]] = fir.load %1#0 : !fir.ref<i32>
|
||||||
|
! HLFIR: hlfir.assign %[[SCALAR]] to
|
||||||
|
! HLFIR: omp.terminator
|
||||||
|
! HLFIR: }
|
||||||
|
! HLFIR: omp.terminator
|
||||||
|
! HLFIR: }
|
||||||
|
|
||||||
|
! FIR: omp.parallel {
|
||||||
|
! FIR: %[[SCALAR_ALLOCA:.*]] = fir.alloca i32
|
||||||
|
! FIR: omp.single copyprivate(%[[SCALAR_ALLOCA]] -> @_workshare_copy_i32 : !fir.ref<i32>) {
|
||||||
|
! FIR: %[[SCALAR_LOAD:.*]] = fir.load %{{.*}} : !fir.ref<i32>
|
||||||
|
! FIR: fir.store %[[SCALAR_LOAD]] to %[[SCALAR_ALLOCA]] : !fir.ref<i32>
|
||||||
|
! FIR: omp.terminator
|
||||||
|
! FIR: }
|
||||||
|
! FIR: %[[SCALAR_RELOAD:.*]] = fir.load %[[SCALAR_ALLOCA]] : !fir.ref<i32>
|
||||||
|
! FIR: %6:3 = fir.box_dims %3, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
|
||||||
|
! FIR: omp.wsloop nowait {
|
||||||
|
! FIR: omp.loop_nest (%arg2) : index = (%c1) to (%6#1) inclusive step (%c1) {
|
||||||
|
! FIR: fir.store %[[SCALAR_RELOAD]]
|
||||||
|
! FIR: omp.yield
|
||||||
|
! FIR: }
|
||||||
|
! FIR: }
|
||||||
|
! FIR: omp.barrier
|
||||||
|
! FIR: omp.terminator
|
65
flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90
Normal file
65
flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
!===----------------------------------------------------------------------===!
|
||||||
|
! This directory can be used to add Integration tests involving multiple
|
||||||
|
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
|
||||||
|
! contain executable tests. We should only add tests here sparingly and only
|
||||||
|
! if there is no other way to test. Repeat this message in each test that is
|
||||||
|
! added to this directory and sub-directories.
|
||||||
|
!===----------------------------------------------------------------------===!
|
||||||
|
|
||||||
|
!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR-O3
|
||||||
|
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR-O3
|
||||||
|
|
||||||
|
!RUN: %flang_fc1 -emit-hlfir -fopenmp -O0 %s -o - | FileCheck %s --check-prefix HLFIR-O0
|
||||||
|
!RUN: %flang_fc1 -emit-fir -fopenmp -O0 %s -o - | FileCheck %s --check-prefix FIR-O0
|
||||||
|
|
||||||
|
program test
|
||||||
|
real :: arr_01(10)
|
||||||
|
!$omp parallel workshare
|
||||||
|
arr_01 = arr_01*2
|
||||||
|
!$omp end parallel workshare
|
||||||
|
end program
|
||||||
|
|
||||||
|
! HLFIR-O3: omp.parallel {
|
||||||
|
! HLFIR-O3: omp.workshare {
|
||||||
|
! HLFIR-O3: hlfir.elemental
|
||||||
|
! HLFIR-O3: hlfir.assign
|
||||||
|
! HLFIR-O3: hlfir.destroy
|
||||||
|
! HLFIR-O3: omp.terminator
|
||||||
|
! HLFIR-O3: omp.terminator
|
||||||
|
|
||||||
|
! FIR-O3: omp.parallel {
|
||||||
|
! FIR-O3: omp.wsloop nowait {
|
||||||
|
! FIR-O3: omp.loop_nest
|
||||||
|
! FIR-O3: omp.barrier
|
||||||
|
! FIR-O3: omp.terminator
|
||||||
|
|
||||||
|
! HLFIR-O0: omp.parallel {
|
||||||
|
! HLFIR-O0: omp.workshare {
|
||||||
|
! HLFIR-O0: hlfir.elemental
|
||||||
|
! HLFIR-O0: hlfir.assign
|
||||||
|
! HLFIR-O0: hlfir.destroy
|
||||||
|
! HLFIR-O0: omp.terminator
|
||||||
|
! HLFIR-O0: omp.terminator
|
||||||
|
|
||||||
|
! Check the copyprivate copy function
|
||||||
|
! FIR-O0: func.func private @_workshare_copy_heap_{{.*}}(%[[DST:.*]]: {{.*}}, %[[SRC:.*]]: {{.*}})
|
||||||
|
! FIR-O0: fir.load %[[SRC]]
|
||||||
|
! FIR-O0: fir.store {{.*}} to %[[DST]]
|
||||||
|
|
||||||
|
! Check that we properly handle the temporary array
|
||||||
|
! FIR-O0: omp.parallel {
|
||||||
|
! FIR-O0: %[[CP:.*]] = fir.alloca !fir.heap<!fir.array<10xf32>>
|
||||||
|
! FIR-O0: omp.single copyprivate(%[[CP]] -> @_workshare_copy_heap_
|
||||||
|
! FIR-O0: fir.allocmem
|
||||||
|
! FIR-O0: fir.store
|
||||||
|
! FIR-O0: omp.terminator
|
||||||
|
! FIR-O0: fir.load %[[CP]]
|
||||||
|
! FIR-O0: omp.wsloop {
|
||||||
|
! FIR-O0: omp.loop_nest
|
||||||
|
! FIR-O0: omp.yield
|
||||||
|
! FIR-O0: omp.single nowait {
|
||||||
|
! FIR-O0: fir.call @_FortranAAssign
|
||||||
|
! FIR-O0: fir.freemem
|
||||||
|
! FIR-O0: omp.terminator
|
||||||
|
! FIR-O0: omp.barrier
|
||||||
|
! FIR-O0: omp.terminator
|
162
flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir
Normal file
162
flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s
|
||||||
|
|
||||||
|
// Checks that we correctly identify when to use the lowering to
|
||||||
|
// omp.workshare.loop_wrapper
|
||||||
|
|
||||||
|
// CHECK-LABEL: @should_parallelize_0
|
||||||
|
// CHECK: omp.workshare.loop_wrapper
|
||||||
|
func.func @should_parallelize_0(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
|
||||||
|
omp.workshare {
|
||||||
|
%c42 = arith.constant 42 : index
|
||||||
|
%c1_i32 = arith.constant 1 : i32
|
||||||
|
%shape = fir.shape %c42 : (index) -> !fir.shape<1>
|
||||||
|
%array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
|
||||||
|
%elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
|
||||||
|
^bb0(%i: index):
|
||||||
|
hlfir.yield_element %c1_i32 : i32
|
||||||
|
}
|
||||||
|
hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
|
||||||
|
hlfir.destroy %elemental : !hlfir.expr<42xi32>
|
||||||
|
omp.terminator
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @should_parallelize_1
|
||||||
|
// CHECK: omp.workshare.loop_wrapper
|
||||||
|
func.func @should_parallelize_1(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
|
||||||
|
omp.parallel {
|
||||||
|
omp.workshare {
|
||||||
|
%c42 = arith.constant 42 : index
|
||||||
|
%c1_i32 = arith.constant 1 : i32
|
||||||
|
%shape = fir.shape %c42 : (index) -> !fir.shape<1>
|
||||||
|
%array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
|
||||||
|
%elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
|
||||||
|
^bb0(%i: index):
|
||||||
|
hlfir.yield_element %c1_i32 : i32
|
||||||
|
}
|
||||||
|
hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
|
||||||
|
hlfir.destroy %elemental : !hlfir.expr<42xi32>
|
||||||
|
omp.terminator
|
||||||
|
}
|
||||||
|
omp.terminator
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// CHECK-LABEL: @should_not_parallelize_0
|
||||||
|
// CHECK-NOT: omp.workshare.loop_wrapper
|
||||||
|
func.func @should_not_parallelize_0(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
|
||||||
|
omp.workshare {
|
||||||
|
omp.single {
|
||||||
|
%c42 = arith.constant 42 : index
|
||||||
|
%c1_i32 = arith.constant 1 : i32
|
||||||
|
%shape = fir.shape %c42 : (index) -> !fir.shape<1>
|
||||||
|
%array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
|
||||||
|
%elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
|
||||||
|
^bb0(%i: index):
|
||||||
|
hlfir.yield_element %c1_i32 : i32
|
||||||
|
}
|
||||||
|
hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
|
||||||
|
hlfir.destroy %elemental : !hlfir.expr<42xi32>
|
||||||
|
omp.terminator
|
||||||
|
}
|
||||||
|
omp.terminator
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @should_not_parallelize_1
|
||||||
|
// CHECK-NOT: omp.workshare.loop_wrapper
|
||||||
|
func.func @should_not_parallelize_1(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
|
||||||
|
omp.workshare {
|
||||||
|
omp.critical {
|
||||||
|
%c42 = arith.constant 42 : index
|
||||||
|
%c1_i32 = arith.constant 1 : i32
|
||||||
|
%shape = fir.shape %c42 : (index) -> !fir.shape<1>
|
||||||
|
%array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
|
||||||
|
%elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
|
||||||
|
^bb0(%i: index):
|
||||||
|
hlfir.yield_element %c1_i32 : i32
|
||||||
|
}
|
||||||
|
hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
|
||||||
|
hlfir.destroy %elemental : !hlfir.expr<42xi32>
|
||||||
|
omp.terminator
|
||||||
|
}
|
||||||
|
omp.terminator
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @should_not_parallelize_2
|
||||||
|
// CHECK-NOT: omp.workshare.loop_wrapper
|
||||||
|
func.func @should_not_parallelize_2(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
|
||||||
|
omp.workshare {
|
||||||
|
omp.parallel {
|
||||||
|
%c42 = arith.constant 42 : index
|
||||||
|
%c1_i32 = arith.constant 1 : i32
|
||||||
|
%shape = fir.shape %c42 : (index) -> !fir.shape<1>
|
||||||
|
%array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
|
||||||
|
%elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
|
||||||
|
^bb0(%i: index):
|
||||||
|
hlfir.yield_element %c1_i32 : i32
|
||||||
|
}
|
||||||
|
hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
|
||||||
|
hlfir.destroy %elemental : !hlfir.expr<42xi32>
|
||||||
|
omp.terminator
|
||||||
|
}
|
||||||
|
omp.terminator
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @should_not_parallelize_3
|
||||||
|
// CHECK-NOT: omp.workshare.loop_wrapper
|
||||||
|
func.func @should_not_parallelize_3(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
|
||||||
|
omp.workshare {
|
||||||
|
omp.parallel {
|
||||||
|
omp.workshare {
|
||||||
|
omp.parallel {
|
||||||
|
%c42 = arith.constant 42 : index
|
||||||
|
%c1_i32 = arith.constant 1 : i32
|
||||||
|
%shape = fir.shape %c42 : (index) -> !fir.shape<1>
|
||||||
|
%array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
|
||||||
|
%elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
|
||||||
|
^bb0(%i: index):
|
||||||
|
hlfir.yield_element %c1_i32 : i32
|
||||||
|
}
|
||||||
|
hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
|
||||||
|
hlfir.destroy %elemental : !hlfir.expr<42xi32>
|
||||||
|
omp.terminator
|
||||||
|
}
|
||||||
|
omp.terminator
|
||||||
|
}
|
||||||
|
omp.terminator
|
||||||
|
}
|
||||||
|
omp.terminator
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @should_not_parallelize_4
|
||||||
|
// CHECK-NOT: omp.workshare.loop_wrapper
|
||||||
|
func.func @should_not_parallelize_4(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
|
||||||
|
omp.workshare {
|
||||||
|
^bb1:
|
||||||
|
%c42 = arith.constant 42 : index
|
||||||
|
%c1_i32 = arith.constant 1 : i32
|
||||||
|
%shape = fir.shape %c42 : (index) -> !fir.shape<1>
|
||||||
|
%array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
|
||||||
|
%elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
|
||||||
|
^bb0(%i: index):
|
||||||
|
hlfir.yield_element %c1_i32 : i32
|
||||||
|
}
|
||||||
|
hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
|
||||||
|
hlfir.destroy %elemental : !hlfir.expr<42xi32>
|
||||||
|
cf.br ^bb2
|
||||||
|
^bb2:
|
||||||
|
omp.terminator
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user