[flang][fir] Lower do concurrent
loop nests to fir.do_concurrent
(#137928)
Adds support for lowering `do concurrent` nests from PFT to the new `fir.do_concurrent` MLIR op as well as its special terminator `fir.do_concurrent.loop` which models the actual loop nest. To that end, this PR emits the allocations for the iteration variables within the block of the `fir.do_concurrent` op and creates a region for the `fir.do_concurrent.loop` op that accepts arguments equal in number to the number of the input `do concurrent` iteration ranges. For example, given the following input: ```fortran do concurrent(i=1:10, j=11:20) end do ``` the changes in this PR emit the following MLIR: ```mlir fir.do_concurrent { %22 = fir.alloca i32 {bindc_name = "i"} %23:2 = hlfir.declare %22 {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) %24 = fir.alloca i32 {bindc_name = "j"} %25:2 = hlfir.declare %24 {uniq_name = "_QFsub1Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) fir.do_concurrent.loop (%arg1, %arg2) = (%18, %20) to (%19, %21) step (%c1, %c1_0) { %26 = fir.convert %arg1 : (index) -> i32 fir.store %26 to %23#0 : !fir.ref<i32> %27 = fir.convert %arg2 : (index) -> i32 fir.store %27 to %25#0 : !fir.ref<i32> } } ```
This commit is contained in:
parent
c3ce5684a8
commit
2fb288d4b8
@ -94,10 +94,11 @@ struct IncrementLoopInfo {
|
||||
template <typename T>
|
||||
explicit IncrementLoopInfo(Fortran::semantics::Symbol &sym, const T &lower,
|
||||
const T &upper, const std::optional<T> &step,
|
||||
bool isUnordered = false)
|
||||
bool isConcurrent = false)
|
||||
: loopVariableSym{&sym}, lowerExpr{Fortran::semantics::GetExpr(lower)},
|
||||
upperExpr{Fortran::semantics::GetExpr(upper)},
|
||||
stepExpr{Fortran::semantics::GetExpr(step)}, isUnordered{isUnordered} {}
|
||||
stepExpr{Fortran::semantics::GetExpr(step)},
|
||||
isConcurrent{isConcurrent} {}
|
||||
|
||||
IncrementLoopInfo(IncrementLoopInfo &&) = default;
|
||||
IncrementLoopInfo &operator=(IncrementLoopInfo &&x) = default;
|
||||
@ -120,7 +121,7 @@ struct IncrementLoopInfo {
|
||||
const Fortran::lower::SomeExpr *upperExpr;
|
||||
const Fortran::lower::SomeExpr *stepExpr;
|
||||
const Fortran::lower::SomeExpr *maskExpr = nullptr;
|
||||
bool isUnordered; // do concurrent, forall
|
||||
bool isConcurrent;
|
||||
llvm::SmallVector<const Fortran::semantics::Symbol *> localSymList;
|
||||
llvm::SmallVector<const Fortran::semantics::Symbol *> localInitSymList;
|
||||
llvm::SmallVector<
|
||||
@ -130,7 +131,7 @@ struct IncrementLoopInfo {
|
||||
mlir::Value loopVariable = nullptr;
|
||||
|
||||
// Data members for structured loops.
|
||||
fir::DoLoopOp doLoop = nullptr;
|
||||
mlir::Operation *loopOp = nullptr;
|
||||
|
||||
// Data members for unstructured loops.
|
||||
bool hasRealControl = false;
|
||||
@ -1981,7 +1982,7 @@ private:
|
||||
llvm_unreachable("illegal reduction operator");
|
||||
}
|
||||
|
||||
/// Collect DO CONCURRENT or FORALL loop control information.
|
||||
/// Collect DO CONCURRENT loop control information.
|
||||
IncrementLoopNestInfo getConcurrentControl(
|
||||
const Fortran::parser::ConcurrentHeader &header,
|
||||
const std::list<Fortran::parser::LocalitySpec> &localityList = {}) {
|
||||
@ -2292,8 +2293,14 @@ private:
|
||||
mlir::LLVM::LoopAnnotationAttr la = mlir::LLVM::LoopAnnotationAttr::get(
|
||||
builder->getContext(), {}, /*vectorize=*/va, {}, /*unroll*/ ua,
|
||||
/*unroll_and_jam*/ uja, {}, {}, {}, {}, {}, {}, {}, {}, {}, {});
|
||||
if (has_attrs)
|
||||
info.doLoop.setLoopAnnotationAttr(la);
|
||||
if (has_attrs) {
|
||||
if (auto loopOp = mlir::dyn_cast<fir::DoLoopOp>(info.loopOp))
|
||||
loopOp.setLoopAnnotationAttr(la);
|
||||
|
||||
if (auto doConcurrentOp =
|
||||
mlir::dyn_cast<fir::DoConcurrentLoopOp>(info.loopOp))
|
||||
doConcurrentOp.setLoopAnnotationAttr(la);
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate FIR to begin a structured or unstructured increment loop nest.
|
||||
@ -2302,96 +2309,77 @@ private:
|
||||
llvm::SmallVectorImpl<const Fortran::parser::CompilerDirective *> &dirs) {
|
||||
assert(!incrementLoopNestInfo.empty() && "empty loop nest");
|
||||
mlir::Location loc = toLocation();
|
||||
mlir::Operation *boundsAndStepIP = nullptr;
|
||||
mlir::arith::IntegerOverflowFlags iofBackup{};
|
||||
|
||||
llvm::SmallVector<mlir::Value> nestLBs;
|
||||
llvm::SmallVector<mlir::Value> nestUBs;
|
||||
llvm::SmallVector<mlir::Value> nestSts;
|
||||
llvm::SmallVector<mlir::Value> nestReduceOperands;
|
||||
llvm::SmallVector<mlir::Attribute> nestReduceAttrs;
|
||||
bool genDoConcurrent = false;
|
||||
|
||||
for (IncrementLoopInfo &info : incrementLoopNestInfo) {
|
||||
mlir::Value lowerValue;
|
||||
mlir::Value upperValue;
|
||||
mlir::Value stepValue;
|
||||
|
||||
{
|
||||
mlir::OpBuilder::InsertionGuard guard(*builder);
|
||||
|
||||
// Set the IP before the first loop in the nest so that all nest bounds
|
||||
// and step values are created outside the nest.
|
||||
if (boundsAndStepIP)
|
||||
builder->setInsertionPointAfter(boundsAndStepIP);
|
||||
genDoConcurrent = info.isStructured() && info.isConcurrent;
|
||||
|
||||
if (!genDoConcurrent)
|
||||
info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym,
|
||||
info.isUnordered);
|
||||
if (!getLoweringOptions().getIntegerWrapAround()) {
|
||||
iofBackup = builder->getIntegerOverflowFlags();
|
||||
builder->setIntegerOverflowFlags(
|
||||
mlir::arith::IntegerOverflowFlags::nsw);
|
||||
}
|
||||
lowerValue = genControlValue(info.lowerExpr, info);
|
||||
upperValue = genControlValue(info.upperExpr, info);
|
||||
bool isConst = true;
|
||||
stepValue = genControlValue(info.stepExpr, info,
|
||||
info.isStructured() ? nullptr : &isConst);
|
||||
if (!getLoweringOptions().getIntegerWrapAround())
|
||||
builder->setIntegerOverflowFlags(iofBackup);
|
||||
boundsAndStepIP = stepValue.getDefiningOp();
|
||||
info.isConcurrent);
|
||||
|
||||
// Use a temp variable for unstructured loops with non-const step.
|
||||
if (!isConst) {
|
||||
info.stepVariable =
|
||||
builder->createTemporary(loc, stepValue.getType());
|
||||
boundsAndStepIP =
|
||||
builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable);
|
||||
}
|
||||
if (!getLoweringOptions().getIntegerWrapAround()) {
|
||||
iofBackup = builder->getIntegerOverflowFlags();
|
||||
builder->setIntegerOverflowFlags(
|
||||
mlir::arith::IntegerOverflowFlags::nsw);
|
||||
}
|
||||
|
||||
nestLBs.push_back(genControlValue(info.lowerExpr, info));
|
||||
nestUBs.push_back(genControlValue(info.upperExpr, info));
|
||||
bool isConst = true;
|
||||
nestSts.push_back(genControlValue(
|
||||
info.stepExpr, info, info.isStructured() ? nullptr : &isConst));
|
||||
|
||||
if (!getLoweringOptions().getIntegerWrapAround())
|
||||
builder->setIntegerOverflowFlags(iofBackup);
|
||||
|
||||
// Use a temp variable for unstructured loops with non-const step.
|
||||
if (!isConst) {
|
||||
mlir::Value stepValue = nestSts.back();
|
||||
info.stepVariable = builder->createTemporary(loc, stepValue.getType());
|
||||
builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable);
|
||||
}
|
||||
|
||||
if (genDoConcurrent && nestReduceOperands.empty()) {
|
||||
// Create DO CONCURRENT reduce operands and attributes
|
||||
for (const auto &reduceSym : info.reduceSymList) {
|
||||
const fir::ReduceOperationEnum reduceOperation = reduceSym.first;
|
||||
const Fortran::semantics::Symbol *sym = reduceSym.second;
|
||||
fir::ExtendedValue exv = getSymbolExtendedValue(*sym, nullptr);
|
||||
nestReduceOperands.push_back(fir::getBase(exv));
|
||||
auto reduceAttr =
|
||||
fir::ReduceAttr::get(builder->getContext(), reduceOperation);
|
||||
nestReduceAttrs.push_back(reduceAttr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto [info, lowerValue, upperValue, stepValue] :
|
||||
llvm::zip_equal(incrementLoopNestInfo, nestLBs, nestUBs, nestSts)) {
|
||||
// Structured loop - generate fir.do_loop.
|
||||
if (info.isStructured()) {
|
||||
if (genDoConcurrent)
|
||||
continue;
|
||||
|
||||
// The loop variable is a doLoop op argument.
|
||||
mlir::Type loopVarType = info.getLoopVariableType();
|
||||
mlir::Value loopValue;
|
||||
if (info.isUnordered) {
|
||||
llvm::SmallVector<mlir::Value> reduceOperands;
|
||||
llvm::SmallVector<mlir::Attribute> reduceAttrs;
|
||||
// Create DO CONCURRENT reduce operands and attributes
|
||||
for (const auto &reduceSym : info.reduceSymList) {
|
||||
const fir::ReduceOperationEnum reduce_operation = reduceSym.first;
|
||||
const Fortran::semantics::Symbol *sym = reduceSym.second;
|
||||
fir::ExtendedValue exv = getSymbolExtendedValue(*sym, nullptr);
|
||||
reduceOperands.push_back(fir::getBase(exv));
|
||||
auto reduce_attr =
|
||||
fir::ReduceAttr::get(builder->getContext(), reduce_operation);
|
||||
reduceAttrs.push_back(reduce_attr);
|
||||
}
|
||||
// The loop variable value is explicitly updated.
|
||||
info.doLoop = builder->create<fir::DoLoopOp>(
|
||||
loc, lowerValue, upperValue, stepValue, /*unordered=*/true,
|
||||
/*finalCountValue=*/false, /*iterArgs=*/std::nullopt,
|
||||
llvm::ArrayRef<mlir::Value>(reduceOperands), reduceAttrs);
|
||||
builder->setInsertionPointToStart(info.doLoop.getBody());
|
||||
loopValue = builder->createConvert(loc, loopVarType,
|
||||
info.doLoop.getInductionVar());
|
||||
} else {
|
||||
// The loop variable is a doLoop op argument.
|
||||
info.doLoop = builder->create<fir::DoLoopOp>(
|
||||
loc, lowerValue, upperValue, stepValue, /*unordered=*/false,
|
||||
/*finalCountValue=*/true,
|
||||
builder->createConvert(loc, loopVarType, lowerValue));
|
||||
builder->setInsertionPointToStart(info.doLoop.getBody());
|
||||
loopValue = info.doLoop.getRegionIterArgs()[0];
|
||||
}
|
||||
auto loopOp = builder->create<fir::DoLoopOp>(
|
||||
loc, lowerValue, upperValue, stepValue, /*unordered=*/false,
|
||||
/*finalCountValue=*/true,
|
||||
builder->createConvert(loc, loopVarType, lowerValue));
|
||||
info.loopOp = loopOp;
|
||||
builder->setInsertionPointToStart(loopOp.getBody());
|
||||
mlir::Value loopValue = loopOp.getRegionIterArgs()[0];
|
||||
|
||||
// Update the loop variable value in case it has non-index references.
|
||||
builder->create<fir::StoreOp>(loc, loopValue, info.loopVariable);
|
||||
if (info.maskExpr) {
|
||||
Fortran::lower::StatementContext stmtCtx;
|
||||
mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx);
|
||||
stmtCtx.finalizeAndReset();
|
||||
mlir::Value maskCondCast =
|
||||
builder->createConvert(loc, builder->getI1Type(), maskCond);
|
||||
auto ifOp = builder->create<fir::IfOp>(loc, maskCondCast,
|
||||
/*withElseRegion=*/false);
|
||||
builder->setInsertionPointToStart(&ifOp.getThenRegion().front());
|
||||
}
|
||||
if (info.hasLocalitySpecs())
|
||||
handleLocalitySpecs(info);
|
||||
|
||||
addLoopAnnotationAttr(info, dirs);
|
||||
continue;
|
||||
}
|
||||
@ -2455,6 +2443,60 @@ private:
|
||||
builder->restoreInsertionPoint(insertPt);
|
||||
}
|
||||
}
|
||||
|
||||
if (genDoConcurrent) {
|
||||
auto loopWrapperOp = builder->create<fir::DoConcurrentOp>(loc);
|
||||
builder->setInsertionPointToStart(
|
||||
builder->createBlock(&loopWrapperOp.getRegion()));
|
||||
|
||||
for (IncrementLoopInfo &info : llvm::reverse(incrementLoopNestInfo)) {
|
||||
info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym,
|
||||
info.isConcurrent);
|
||||
}
|
||||
|
||||
builder->setInsertionPointToEnd(loopWrapperOp.getBody());
|
||||
auto loopOp = builder->create<fir::DoConcurrentLoopOp>(
|
||||
loc, nestLBs, nestUBs, nestSts, nestReduceOperands,
|
||||
nestReduceAttrs.empty()
|
||||
? nullptr
|
||||
: mlir::ArrayAttr::get(builder->getContext(), nestReduceAttrs),
|
||||
nullptr);
|
||||
|
||||
llvm::SmallVector<mlir::Type> loopBlockArgTypes(
|
||||
incrementLoopNestInfo.size(), builder->getIndexType());
|
||||
llvm::SmallVector<mlir::Location> loopBlockArgLocs(
|
||||
incrementLoopNestInfo.size(), loc);
|
||||
mlir::Region &loopRegion = loopOp.getRegion();
|
||||
mlir::Block *loopBlock = builder->createBlock(
|
||||
&loopRegion, loopRegion.begin(), loopBlockArgTypes, loopBlockArgLocs);
|
||||
builder->setInsertionPointToStart(loopBlock);
|
||||
|
||||
for (auto [info, blockArg] :
|
||||
llvm::zip_equal(incrementLoopNestInfo, loopBlock->getArguments())) {
|
||||
info.loopOp = loopOp;
|
||||
mlir::Value loopValue =
|
||||
builder->createConvert(loc, info.getLoopVariableType(), blockArg);
|
||||
builder->create<fir::StoreOp>(loc, loopValue, info.loopVariable);
|
||||
|
||||
if (info.maskExpr) {
|
||||
Fortran::lower::StatementContext stmtCtx;
|
||||
mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx);
|
||||
stmtCtx.finalizeAndReset();
|
||||
mlir::Value maskCondCast =
|
||||
builder->createConvert(loc, builder->getI1Type(), maskCond);
|
||||
auto ifOp = builder->create<fir::IfOp>(loc, maskCondCast,
|
||||
/*withElseRegion=*/false);
|
||||
builder->setInsertionPointToStart(&ifOp.getThenRegion().front());
|
||||
}
|
||||
}
|
||||
|
||||
IncrementLoopInfo &innermostInfo = incrementLoopNestInfo.back();
|
||||
|
||||
if (innermostInfo.hasLocalitySpecs())
|
||||
handleLocalitySpecs(innermostInfo);
|
||||
|
||||
addLoopAnnotationAttr(innermostInfo, dirs);
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate FIR to end a structured or unstructured increment loop nest.
|
||||
@ -2471,29 +2513,31 @@ private:
|
||||
it != rend; ++it) {
|
||||
IncrementLoopInfo &info = *it;
|
||||
if (info.isStructured()) {
|
||||
// End fir.do_loop.
|
||||
if (info.isUnordered) {
|
||||
builder->setInsertionPointAfter(info.doLoop);
|
||||
// End fir.do_concurent.loop.
|
||||
if (info.isConcurrent) {
|
||||
builder->setInsertionPointAfter(info.loopOp->getParentOp());
|
||||
continue;
|
||||
}
|
||||
|
||||
// End fir.do_loop.
|
||||
// Decrement tripVariable.
|
||||
builder->setInsertionPointToEnd(info.doLoop.getBody());
|
||||
auto doLoopOp = mlir::cast<fir::DoLoopOp>(info.loopOp);
|
||||
builder->setInsertionPointToEnd(doLoopOp.getBody());
|
||||
llvm::SmallVector<mlir::Value, 2> results;
|
||||
results.push_back(builder->create<mlir::arith::AddIOp>(
|
||||
loc, info.doLoop.getInductionVar(), info.doLoop.getStep(),
|
||||
iofAttr));
|
||||
loc, doLoopOp.getInductionVar(), doLoopOp.getStep(), iofAttr));
|
||||
// Step loopVariable to help optimizations such as vectorization.
|
||||
// Induction variable elimination will clean up as necessary.
|
||||
mlir::Value step = builder->createConvert(
|
||||
loc, info.getLoopVariableType(), info.doLoop.getStep());
|
||||
loc, info.getLoopVariableType(), doLoopOp.getStep());
|
||||
mlir::Value loopVar =
|
||||
builder->create<fir::LoadOp>(loc, info.loopVariable);
|
||||
results.push_back(
|
||||
builder->create<mlir::arith::AddIOp>(loc, loopVar, step, iofAttr));
|
||||
builder->create<fir::ResultOp>(loc, results);
|
||||
builder->setInsertionPointAfter(info.doLoop);
|
||||
builder->setInsertionPointAfter(doLoopOp);
|
||||
// The loop control variable may be used after the loop.
|
||||
builder->create<fir::StoreOp>(loc, info.doLoop.getResult(1),
|
||||
builder->create<fir::StoreOp>(loc, doLoopOp.getResult(1),
|
||||
info.loopVariable);
|
||||
continue;
|
||||
}
|
||||
|
@ -280,6 +280,9 @@ mlir::Block *fir::FirOpBuilder::getAllocaBlock() {
|
||||
if (auto cufKernelOp = getRegion().getParentOfType<cuf::KernelOp>())
|
||||
return &cufKernelOp.getRegion().front();
|
||||
|
||||
if (auto doConcurentOp = getRegion().getParentOfType<fir::DoConcurrentOp>())
|
||||
return doConcurentOp.getBody();
|
||||
|
||||
return getEntryBlock();
|
||||
}
|
||||
|
||||
|
@ -14,6 +14,9 @@ subroutine sub1(n)
|
||||
implicit none
|
||||
integer :: n, m, i, j, k
|
||||
integer, dimension(n) :: a
|
||||
!CHECK: %[[N_DECL:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %{{.*}} {uniq_name = "_QFsub1En"}
|
||||
!CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub1Ea"}
|
||||
|
||||
!CHECK: %[[LB1:.*]] = arith.constant 1 : i32
|
||||
!CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index
|
||||
!CHECK: %[[UB1:.*]] = fir.load %{{.*}}#0 : !fir.ref<i32>
|
||||
@ -29,10 +32,30 @@ subroutine sub1(n)
|
||||
!CHECK: %[[UB3:.*]] = arith.constant 10 : i32
|
||||
!CHECK: %[[UB3_CVT:.*]] = fir.convert %[[UB3]] : (i32) -> index
|
||||
|
||||
!CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered
|
||||
!CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered
|
||||
!CHECK: fir.do_loop %{{.*}} = %[[LB3_CVT]] to %[[UB3_CVT]] step %{{.*}} unordered
|
||||
!CHECK: fir.do_concurrent
|
||||
!CHECK: %[[I:.*]] = fir.alloca i32 {bindc_name = "i"}
|
||||
!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I]]
|
||||
!CHECK: %[[J:.*]] = fir.alloca i32 {bindc_name = "j"}
|
||||
!CHECK: %[[J_DECL:.*]]:2 = hlfir.declare %[[J]]
|
||||
!CHECK: %[[K:.*]] = fir.alloca i32 {bindc_name = "k"}
|
||||
!CHECK: %[[K_DECL:.*]]:2 = hlfir.declare %[[K]]
|
||||
|
||||
!CHECK: fir.do_concurrent.loop (%[[I_IV:.*]], %[[J_IV:.*]], %[[K_IV:.*]]) =
|
||||
!CHECK-SAME: (%[[LB1_CVT]], %[[LB2_CVT]], %[[LB3_CVT]]) to
|
||||
!CHECK-SAME: (%[[UB1_CVT]], %[[UB2_CVT]], %[[UB3_CVT]]) step
|
||||
!CHECK-SAME: (%{{.*}}, %{{.*}}, %{{.*}}) {
|
||||
!CHECK: %[[I_IV_CVT:.*]] = fir.convert %[[I_IV]] : (index) -> i32
|
||||
!CHECK: fir.store %[[I_IV_CVT]] to %[[I_DECL]]#0 : !fir.ref<i32>
|
||||
!CHECK: %[[J_IV_CVT:.*]] = fir.convert %[[J_IV]] : (index) -> i32
|
||||
!CHECK: fir.store %[[J_IV_CVT]] to %[[J_DECL]]#0 : !fir.ref<i32>
|
||||
!CHECK: %[[K_IV_CVT:.*]] = fir.convert %[[K_IV]] : (index) -> i32
|
||||
!CHECK: fir.store %[[K_IV_CVT]] to %[[K_DECL]]#0 : !fir.ref<i32>
|
||||
|
||||
!CHECK: %[[N_VAL:.*]] = fir.load %[[N_DECL]]#0 : !fir.ref<i32>
|
||||
!CHECK: %[[I_VAL:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
|
||||
!CHECK: %[[I_VAL_CVT:.*]] = fir.convert %[[I_VAL]] : (i32) -> i64
|
||||
!CHECK: %[[A_ELEM:.*]] = hlfir.designate %[[A_DECL]]#0 (%[[I_VAL_CVT]])
|
||||
!CHECK: hlfir.assign %[[N_VAL]] to %[[A_ELEM]] : i32, !fir.ref<i32>
|
||||
do concurrent(i=1:n, j=1:bar(n*m, n/m), k=5:10)
|
||||
a(i) = n
|
||||
end do
|
||||
@ -45,14 +68,17 @@ subroutine sub2(n)
|
||||
integer, dimension(n) :: a
|
||||
!CHECK: %[[LB1:.*]] = arith.constant 1 : i32
|
||||
!CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index
|
||||
!CHECK: %[[UB1:.*]] = fir.load %5#0 : !fir.ref<i32>
|
||||
!CHECK: %[[UB1:.*]] = fir.load %{{.*}}#0 : !fir.ref<i32>
|
||||
!CHECK: %[[UB1_CVT:.*]] = fir.convert %[[UB1]] : (i32) -> index
|
||||
!CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered
|
||||
!CHECK: fir.do_concurrent
|
||||
!CHECK: fir.do_concurrent.loop (%{{.*}}) = (%[[LB1_CVT]]) to (%[[UB1_CVT]]) step (%{{.*}})
|
||||
|
||||
!CHECK: %[[LB2:.*]] = arith.constant 1 : i32
|
||||
!CHECK: %[[LB2_CVT:.*]] = fir.convert %[[LB2]] : (i32) -> index
|
||||
!CHECK: %[[UB2:.*]] = fir.call @_QPbar(%{{.*}}, %{{.*}}) proc_attrs<pure> fastmath<contract> : (!fir.ref<i32>, !fir.ref<i32>) -> i32
|
||||
!CHECK: %[[UB2_CVT:.*]] = fir.convert %[[UB2]] : (i32) -> index
|
||||
!CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered
|
||||
!CHECK: fir.do_concurrent
|
||||
!CHECK: fir.do_concurrent.loop (%{{.*}}) = (%[[LB2_CVT]]) to (%[[UB2_CVT]]) step (%{{.*}})
|
||||
do concurrent(i=1:n)
|
||||
do concurrent(j=1:bar(n*m, n/m))
|
||||
a(i) = n
|
||||
@ -60,7 +86,6 @@ subroutine sub2(n)
|
||||
end do
|
||||
end subroutine
|
||||
|
||||
|
||||
!CHECK-LABEL: unstructured
|
||||
subroutine unstructured(inner_step)
|
||||
integer(4) :: i, j, inner_step
|
||||
|
@ -29,7 +29,7 @@ end subroutine
|
||||
! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>> {fir.bindc_name = "p"}) {
|
||||
! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>>
|
||||
! CHECK: %[[VAL_7:.*]] = fir.box_elesize %[[VAL_6]] : (!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>) -> index
|
||||
! CHECK: fir.do_loop
|
||||
! CHECK: fir.do_concurrent.loop
|
||||
! CHECK: %[[VAL_16:.*]] = fir.alloca !fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>> {bindc_name = "p", pinned, uniq_name = "_QFtest_ptrEp"}
|
||||
! CHECK: %[[VAL_17:.*]] = fir.zero_bits !fir.ptr<!fir.array<?x!fir.char<1,?>>>
|
||||
! CHECK: %[[VAL_18:.*]] = arith.constant 0 : index
|
||||
@ -43,7 +43,7 @@ end subroutine
|
||||
! CHECK: }
|
||||
|
||||
! CHECK-LABEL: func.func @_QPtest_default_init(
|
||||
! CHECK: fir.do_loop
|
||||
! CHECK: fir.do_concurrent.loop
|
||||
! CHECK: %[[VAL_26:.*]] = fir.alloca !fir.type<_QFtest_default_initTt{i:i32}> {bindc_name = "a", pinned, uniq_name = "_QFtest_default_initEa"}
|
||||
! CHECK: %[[VAL_27:.*]] = fir.embox %[[VAL_26]] : (!fir.ref<!fir.type<_QFtest_default_initTt{i:i32}>>) -> !fir.box<!fir.type<_QFtest_default_initTt{i:i32}>>
|
||||
! CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_27]] : (!fir.box<!fir.type<_QFtest_default_initTt{i:i32}>>) -> !fir.box<none>
|
||||
|
@ -2,15 +2,6 @@
|
||||
|
||||
! CHECK-LABEL: loop_test
|
||||
subroutine loop_test
|
||||
! CHECK: %[[VAL_2:.*]] = fir.alloca i16 {bindc_name = "i"}
|
||||
! CHECK: %[[VAL_3:.*]] = fir.alloca i16 {bindc_name = "i"}
|
||||
! CHECK: %[[VAL_4:.*]] = fir.alloca i16 {bindc_name = "i"}
|
||||
! CHECK: %[[VAL_5:.*]] = fir.alloca i8 {bindc_name = "k"}
|
||||
! CHECK: %[[VAL_6:.*]] = fir.alloca i8 {bindc_name = "j"}
|
||||
! CHECK: %[[VAL_7:.*]] = fir.alloca i8 {bindc_name = "i"}
|
||||
! CHECK: %[[VAL_8:.*]] = fir.alloca i32 {bindc_name = "k"}
|
||||
! CHECK: %[[VAL_9:.*]] = fir.alloca i32 {bindc_name = "j"}
|
||||
! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i"}
|
||||
! CHECK: %[[VAL_11:.*]] = fir.alloca !fir.array<5x5x5xi32> {bindc_name = "a", uniq_name = "_QFloop_testEa"}
|
||||
! CHECK: %[[VAL_12:.*]] = fir.alloca i32 {bindc_name = "asum", uniq_name = "_QFloop_testEasum"}
|
||||
! CHECK: %[[VAL_13:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFloop_testEi"}
|
||||
@ -25,7 +16,7 @@ subroutine loop_test
|
||||
j = 200
|
||||
k = 300
|
||||
|
||||
! CHECK-COUNT-3: fir.do_loop {{.*}} unordered
|
||||
! CHECK: fir.do_concurrent.loop (%{{.*}}, %{{.*}}, %{{.*}}) = {{.*}}
|
||||
do concurrent (i=1:5, j=1:5, k=1:5) ! shared(a)
|
||||
! CHECK: fir.coordinate_of
|
||||
a(i,j,k) = 0
|
||||
@ -33,7 +24,7 @@ subroutine loop_test
|
||||
! CHECK: fir.call @_FortranAioBeginExternalListOutput
|
||||
print*, 'A:', i, j, k
|
||||
|
||||
! CHECK-COUNT-3: fir.do_loop {{.*}} unordered
|
||||
! CHECK: fir.do_concurrent.loop (%{{.*}}, %{{.*}}, %{{.*}}) = {{.*}}
|
||||
! CHECK: fir.if
|
||||
do concurrent (integer(1)::i=1:5, j=1:5, k=1:5, i.ne.j .and. k.ne.3) shared(a)
|
||||
! CHECK-COUNT-2: fir.coordinate_of
|
||||
@ -53,7 +44,7 @@ subroutine loop_test
|
||||
! CHECK: fir.call @_FortranAioBeginExternalListOutput
|
||||
print*, 'B:', i, j, k, '-', asum
|
||||
|
||||
! CHECK: fir.do_loop {{.*}} unordered
|
||||
! CHECK: fir.do_concurrent.loop (%{{.*}}) = {{.*}}
|
||||
! CHECK-COUNT-2: fir.if
|
||||
do concurrent (integer(2)::i=1:5, i.ne.3)
|
||||
if (i.eq.2 .or. i.eq.4) goto 5 ! fir.if
|
||||
@ -62,7 +53,7 @@ subroutine loop_test
|
||||
5 continue
|
||||
enddo
|
||||
|
||||
! CHECK: fir.do_loop {{.*}} unordered
|
||||
! CHECK: fir.do_concurrent.loop (%{{.*}}) = {{.*}}
|
||||
! CHECK-COUNT-2: fir.if
|
||||
do concurrent (integer(2)::i=1:5, i.ne.3)
|
||||
if (i.eq.2 .or. i.eq.4) then ! fir.if
|
||||
@ -93,10 +84,6 @@ end subroutine loop_test
|
||||
|
||||
! CHECK-LABEL: c.func @_QPlis
|
||||
subroutine lis(n)
|
||||
! CHECK-DAG: fir.alloca i32 {bindc_name = "m"}
|
||||
! CHECK-DAG: fir.alloca i32 {bindc_name = "j"}
|
||||
! CHECK-DAG: fir.alloca i32 {bindc_name = "i"}
|
||||
! CHECK-DAG: fir.alloca i8 {bindc_name = "i"}
|
||||
! CHECK-DAG: fir.alloca i32 {bindc_name = "j", uniq_name = "_QFlisEj"}
|
||||
! CHECK-DAG: fir.alloca i32 {bindc_name = "k", uniq_name = "_QFlisEk"}
|
||||
! CHECK-DAG: fir.alloca !fir.box<!fir.ptr<!fir.array<?x?x?xi32>>> {bindc_name = "p", uniq_name = "_QFlisEp"}
|
||||
@ -117,8 +104,8 @@ subroutine lis(n)
|
||||
! CHECK: }
|
||||
r = 0
|
||||
|
||||
! CHECK: fir.do_loop %arg1 = %{{.*}} to %{{.*}} step %{{.*}} unordered {
|
||||
! CHECK: fir.do_loop %arg2 = %{{.*}} to %{{.*}} step %c1{{.*}} iter_args(%arg3 = %{{.*}}) -> (index, i32) {
|
||||
! CHECK: fir.do_concurrent {
|
||||
! CHECK: fir.do_concurrent.loop (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
|
||||
! CHECK: }
|
||||
! CHECK: }
|
||||
do concurrent (integer(kind=1)::i=n:1:-1)
|
||||
@ -128,16 +115,18 @@ subroutine lis(n)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
! CHECK: fir.do_loop %arg1 = %{{.*}} to %{{.*}} step %c1{{.*}} unordered {
|
||||
! CHECK: fir.do_loop %arg2 = %{{.*}} to %{{.*}} step %c1{{.*}} unordered {
|
||||
! CHECK: fir.do_concurrent.loop (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) {
|
||||
! CHECK: fir.if %{{.*}} {
|
||||
! CHECK: %[[V_95:[0-9]+]] = fir.alloca !fir.array<?x?xi32>, %{{.*}}, %{{.*}} {bindc_name = "t", pinned, uniq_name = "_QFlisEt"}
|
||||
! CHECK: %[[V_96:[0-9]+]] = fir.alloca !fir.box<!fir.ptr<!fir.array<?x?x?xi32>>> {bindc_name = "p", pinned, uniq_name = "_QFlisEp"}
|
||||
! CHECK: fir.store %{{.*}} to %[[V_96]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?x?xi32>>>>
|
||||
! CHECK: fir.do_loop %arg3 = %{{.*}} to %{{.*}} step %c1{{.*}} iter_args(%arg4 = %{{.*}}) -> (index, i32) {
|
||||
! CHECK: fir.do_loop %arg5 = %{{.*}} to %{{.*}} step %c1{{.*}} unordered {
|
||||
! CHECK: fir.load %[[V_96]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?x?xi32>>>>
|
||||
! CHECK: fir.convert %[[V_95]] : (!fir.ref<!fir.array<?x?xi32>>) -> !fir.ref<!fir.array<?xi32>>
|
||||
! CHECK: fir.do_concurrent {
|
||||
! CHECK: fir.alloca i32 {bindc_name = "m"}
|
||||
! CHECK: fir.do_concurrent.loop (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
|
||||
! CHECK: fir.load %[[V_96]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?x?xi32>>>>
|
||||
! CHECK: fir.convert %[[V_95]] : (!fir.ref<!fir.array<?x?xi32>>) -> !fir.ref<!fir.array<?xi32>>
|
||||
! CHECK: }
|
||||
! CHECK: }
|
||||
! CHECK: }
|
||||
! CHECK: fir.convert %[[V_95]] : (!fir.ref<!fir.array<?x?xi32>>) -> !fir.ref<!fir.array<?xi32>>
|
||||
|
@ -12,9 +12,7 @@ subroutine loop_test
|
||||
|
||||
! CHECK: %[[VAL_0:.*]] = fir.alloca f32 {bindc_name = "m", uniq_name = "_QFloop_testEm"}
|
||||
! CHECK: %[[VAL_1:.*]] = fir.address_of(@_QFloop_testEsum) : !fir.ref<i32>
|
||||
! CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered reduce(#fir.reduce_attr<add> -> %[[VAL_1:.*]] : !fir.ref<i32>, #fir.reduce_attr<max> -> %[[VAL_0:.*]] : !fir.ref<f32>) {
|
||||
! CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered reduce(#fir.reduce_attr<add> -> %[[VAL_1:.*]] : !fir.ref<i32>, #fir.reduce_attr<max> -> %[[VAL_0:.*]] : !fir.ref<f32>) {
|
||||
! CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered reduce(#fir.reduce_attr<add> -> %[[VAL_1:.*]] : !fir.ref<i32>, #fir.reduce_attr<max> -> %[[VAL_0:.*]] : !fir.ref<f32>) {
|
||||
! CHECK: fir.do_concurrent.loop ({{.*}}) = ({{.*}}) to ({{.*}}) step ({{.*}}) reduce(#fir.reduce_attr<add> -> %[[VAL_1:.*]] : !fir.ref<i32>, #fir.reduce_attr<max> -> %[[VAL_0:.*]] : !fir.ref<f32>) {
|
||||
do concurrent (i=1:5, j=1:5, k=1:5) local(tmp) reduce(+:sum) reduce(max:m)
|
||||
tmp = i + j + k
|
||||
sum = tmp + sum
|
||||
|
@ -139,7 +139,6 @@ end subroutine
|
||||
! CHECK-LABEL: func.func @_QPloop_params3(
|
||||
! CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32
|
||||
! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32
|
||||
! CHECK: %[[VAL_9:.*]] = fir.declare %{{.*}}i"} : (!fir.ref<i32>) -> !fir.ref<i32>
|
||||
! CHECK: %[[VAL_11:.*]] = fir.declare %{{.*}}lb"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
|
||||
! CHECK: %[[VAL_12:.*]] = fir.declare %{{.*}}ub"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
|
||||
! CHECK: %[[VAL_14:.*]] = fir.declare %{{.*}}i"} : (!fir.ref<i32>) -> !fir.ref<i32>
|
||||
@ -153,4 +152,6 @@ end subroutine
|
||||
! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_15]] : !fir.ref<i32>
|
||||
! CHECK: %[[VAL_32:.*]] = arith.muli %[[VAL_31]], %[[VAL_4]] overflow<nsw> : i32
|
||||
! CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> index
|
||||
! CHECK: fir.do_loop %[[VAL_34:.*]] = %[[VAL_28]] to %[[VAL_30]] step %[[VAL_33]] unordered {
|
||||
! CHECK: fir.do_concurrent {
|
||||
! CHECK: %[[VAL_9:.*]] = fir.declare %{{.*}}i"} : (!fir.ref<i32>) -> !fir.ref<i32>
|
||||
! CHECK: fir.do_concurrent.loop (%[[VAL_34:.*]]) = (%[[VAL_28]]) to (%[[VAL_30]]) step (%[[VAL_33]]) {
|
||||
|
@ -1,3 +1,6 @@
|
||||
! Fails until we update the pass to use the `fir.do_concurrent` op.
|
||||
! XFAIL: *
|
||||
|
||||
! Tests mapping of a basic `do concurrent` loop to `!$omp parallel do`.
|
||||
|
||||
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=host %s -o - \
|
||||
|
@ -1,3 +1,6 @@
|
||||
! Fails until we update the pass to use the `fir.do_concurrent` op.
|
||||
! XFAIL: *
|
||||
|
||||
! Tests that "loop-local values" are properly handled by localizing them to the
|
||||
! body of the loop nest. See `collectLoopLocalValues` and `localizeLoopLocalValue`
|
||||
! for a definition of "loop-local values" and how they are handled.
|
||||
|
@ -1,3 +1,6 @@
|
||||
! Fails until we update the pass to use the `fir.do_concurrent` op.
|
||||
! XFAIL: *
|
||||
|
||||
! Tests loop-nest detection algorithm for do-concurrent mapping.
|
||||
|
||||
! REQUIRES: asserts
|
||||
|
@ -1,3 +1,6 @@
|
||||
! Fails until we update the pass to use the `fir.do_concurrent` op.
|
||||
! XFAIL: *
|
||||
|
||||
! Tests mapping of a `do concurrent` loop with multiple iteration ranges.
|
||||
|
||||
! RUN: split-file %s %t
|
||||
|
@ -1,3 +1,6 @@
|
||||
! Fails until we update the pass to use the `fir.do_concurrent` op.
|
||||
! XFAIL: *
|
||||
|
||||
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=host %s -o - \
|
||||
! RUN: | FileCheck %s
|
||||
|
||||
|
@ -1,3 +1,6 @@
|
||||
! Fails until we update the pass to use the `fir.do_concurrent` op.
|
||||
! XFAIL: *
|
||||
|
||||
! Tests that if `do concurrent` is not perfectly nested in its parent loop, that
|
||||
! we skip converting the not-perfectly nested `do concurrent` loop.
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user