From 27f777e9c06daeb03efad9230fe080df2a3a94c5 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 30 Jul 2025 09:20:42 +0200 Subject: [PATCH] [Flang][OpenMP] Skip DSA for canonical loops (#150593) OpenMP loop transformations to not have data-sharing attributes and do not explicitly privatize the loop variable. The DataSharingProcessor was still used in #144785 because `createAndSetPrivatizedLoopVar` expected it. We skip that function and directly write to the loop variable. If the loop variable is implicitly or explicitly privatized, it will be due to surrounding OpenMP constructs such as `parallel`. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 51 +++++----- .../test/Lower/OpenMP/unroll-heuristic01.f90 | 63 +++++++----- .../test/Lower/OpenMP/unroll-heuristic02.f90 | 98 +++++++++---------- .../test/Lower/OpenMP/unroll-heuristic03.f90 | 61 ++++++++++++ 4 files changed, 173 insertions(+), 100 deletions(-) create mode 100644 flang/test/Lower/OpenMP/unroll-heuristic03.f90 diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 12089d6caa5f..6a4ec77ca071 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -697,20 +697,16 @@ static void threadPrivatizeVars(lower::AbstractConverter &converter, } } -static mlir::Operation * -createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter, - mlir::Location loc, mlir::Value indexVal, - const semantics::Symbol *sym) { +static mlir::Operation *setLoopVar(lower::AbstractConverter &converter, + mlir::Location loc, mlir::Value indexVal, + const semantics::Symbol *sym) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint(); firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); - mlir::Type tempTy = converter.genType(*sym); - - assert(converter.isPresentShallowLookup(*sym) && - "Expected symbol to be in symbol table."); - firOpBuilder.restoreInsertionPoint(insPt); + mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal); hlfir::Entity lhs{converter.getSymbolAddress(*sym)}; @@ -721,6 +717,15 @@ createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter, return storeOp; } +static mlir::Operation * +createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter, + mlir::Location loc, mlir::Value indexVal, + const semantics::Symbol *sym) { + assert(converter.isPresentShallowLookup(*sym) && + "Expected symbol to be in symbol table."); + return setLoopVar(converter, loc, indexVal, sym); +} + // This helper function implements the functionality of "promoting" non-CPTR // arguments of use_device_ptr to use_device_addr arguments (automagic // conversion of use_device_ptr -> use_device_addr in these cases). The way we @@ -1123,6 +1128,11 @@ struct OpWithBodyGenInfo { return *this; } + OpWithBodyGenInfo &setPrivatize(bool value) { + privatize = value; + return *this; + } + /// [inout] converter to use for the clauses. lower::AbstractConverter &converter; /// [in] Symbol table @@ -1149,6 +1159,8 @@ struct OpWithBodyGenInfo { /// [in] if set to `true`, skip generating nested evaluations and dispatching /// any further leaf constructs. bool genSkeletonOnly = false; + /// [in] enables handling of privatized variable unless set to `false`. + bool privatize = true; }; /// Create the body (block) for an OpenMP Operation. @@ -1209,7 +1221,7 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, // code will use the right symbols. bool isLoop = llvm::omp::getDirectiveAssociation(info.dir) == llvm::omp::Association::Loop; - bool privatize = info.clauses; + bool privatize = info.clauses && info.privatize; firOpBuilder.setInsertionPoint(marker); std::optional tempDsp; @@ -2083,7 +2095,7 @@ genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, const ConstructQueue &queue, ConstructQueue::const_iterator item, llvm::ArrayRef ivs, - llvm::omp::Directive directive, DataSharingProcessor &dsp) { + llvm::omp::Directive directive) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); assert(ivs.size() == 1 && "Nested loops not yet implemented"); @@ -2176,10 +2188,8 @@ genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, mlir::Value userVal = firOpBuilder.create(loc, loopLBVar, scaled); - // The argument is not currently in memory, so make a temporary for the - // argument, and store it there, then bind that location to the argument. - mlir::Operation *storeOp = - createAndSetPrivatizedLoopVar(converter, loc, userVal, iv); + // Write loop value to loop variable + mlir::Operation *storeOp = setLoopVar(converter, loc, userVal, iv); firOpBuilder.setInsertionPointAfter(storeOp); return {iv}; @@ -2190,7 +2200,7 @@ genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval, directive) .setClauses(&item->clauses) - .setDataSharingProcessor(&dsp) + .setPrivatize(false) .setGenRegionEntryCb(ivCallback), queue, item, tripcount, cli); @@ -2216,17 +2226,10 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter, cp.processTODO( loc, llvm::omp::Directive::OMPD_unroll); - // Even though unroll does not support data-sharing clauses, but this is - // required to fill the symbol table. - DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, - /*shouldCollectPreDeterminedSymbols=*/true, - /*useDelayedPrivatization=*/false, symTable); - dsp.processStep1(); - // Emit the associated loop auto canonLoop = genCanonicalLoopOp(converter, symTable, semaCtx, eval, loc, queue, item, - iv, llvm::omp::Directive::OMPD_unroll, dsp); + iv, llvm::omp::Directive::OMPD_unroll); // Apply unrolling to it auto cli = canonLoop.getCli(); diff --git a/flang/test/Lower/OpenMP/unroll-heuristic01.f90 b/flang/test/Lower/OpenMP/unroll-heuristic01.f90 index a5f5c003b8a7..34020eb727e5 100644 --- a/flang/test/Lower/OpenMP/unroll-heuristic01.f90 +++ b/flang/test/Lower/OpenMP/unroll-heuristic01.f90 @@ -13,27 +13,42 @@ subroutine omp_unroll_heuristic01(lb, ub, inc) end subroutine omp_unroll_heuristic01 -!CHECK-LABEL: func.func @_QPomp_unroll_heuristic01( -!CHECK: %c0_i32 = arith.constant 0 : i32 -!CHECK-NEXT: %c1_i32 = arith.constant 1 : i32 -!CHECK-NEXT: %13 = arith.cmpi slt, %12, %c0_i32 : i32 -!CHECK-NEXT: %14 = arith.subi %c0_i32, %12 : i32 -!CHECK-NEXT: %15 = arith.select %13, %14, %12 : i32 -!CHECK-NEXT: %16 = arith.select %13, %11, %10 : i32 -!CHECK-NEXT: %17 = arith.select %13, %10, %11 : i32 -!CHECK-NEXT: %18 = arith.subi %17, %16 overflow : i32 -!CHECK-NEXT: %19 = arith.divui %18, %15 : i32 -!CHECK-NEXT: %20 = arith.addi %19, %c1_i32 overflow : i32 -!CHECK-NEXT: %21 = arith.cmpi slt, %17, %16 : i32 -!CHECK-NEXT: %22 = arith.select %21, %c0_i32, %20 : i32 -!CHECK-NEXT: %canonloop_s0 = omp.new_cli -!CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%22) { -!CHECK-NEXT: %23 = arith.muli %iv, %12 : i32 -!CHECK-NEXT: %24 = arith.addi %10, %23 : i32 -!CHECK-NEXT: hlfir.assign %24 to %9#0 : i32, !fir.ref -!CHECK-NEXT: %25 = fir.load %9#0 : !fir.ref -!CHECK-NEXT: hlfir.assign %25 to %6#0 : i32, !fir.ref -!CHECK-NEXT: omp.terminator -!CHECK-NEXT: } -!CHECK-NEXT: omp.unroll_heuristic(%canonloop_s0) -!CHECK-NEXT: return +! CHECK-LABEL: func.func @_QPomp_unroll_heuristic01( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "lb"}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "ub"}, +! CHECK-SAME: %[[ARG2:.*]]: !fir.ref {fir.bindc_name = "inc"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_unroll_heuristic01Ei"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFomp_unroll_heuristic01Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic01Einc"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic01Elb"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_unroll_heuristic01Eres"} +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFomp_unroll_heuristic01Eres"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic01Eub"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref +! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref +! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref +! CHECK: %[[VAL_11:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_13:.*]] = arith.cmpi slt, %[[VAL_10]], %[[VAL_11]] : i32 +! CHECK: %[[VAL_14:.*]] = arith.subi %[[VAL_11]], %[[VAL_10]] : i32 +! CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_13]], %[[VAL_14]], %[[VAL_10]] : i32 +! CHECK: %[[VAL_16:.*]] = arith.select %[[VAL_13]], %[[VAL_9]], %[[VAL_8]] : i32 +! CHECK: %[[VAL_17:.*]] = arith.select %[[VAL_13]], %[[VAL_8]], %[[VAL_9]] : i32 +! CHECK: %[[VAL_18:.*]] = arith.subi %[[VAL_17]], %[[VAL_16]] overflow : i32 +! CHECK: %[[VAL_19:.*]] = arith.divui %[[VAL_18]], %[[VAL_15]] : i32 +! CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_19]], %[[VAL_12]] overflow : i32 +! CHECK: %[[VAL_21:.*]] = arith.cmpi slt, %[[VAL_17]], %[[VAL_16]] : i32 +! CHECK: %[[VAL_22:.*]] = arith.select %[[VAL_21]], %[[VAL_11]], %[[VAL_20]] : i32 +! CHECK: %[[VAL_23:.*]] = omp.new_cli +! CHECK: omp.canonical_loop(%[[VAL_23]]) %[[VAL_24:.*]] : i32 in range(%[[VAL_22]]) { +! CHECK: %[[VAL_25:.*]] = arith.muli %[[VAL_24]], %[[VAL_10]] : i32 +! CHECK: %[[VAL_26:.*]] = arith.addi %[[VAL_8]], %[[VAL_25]] : i32 +! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_2]]#0 : i32, !fir.ref +! CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref +! CHECK: hlfir.assign %[[VAL_27]] to %[[VAL_6]]#0 : i32, !fir.ref +! CHECK: omp.terminator +! CHECK: } +! CHECK: omp.unroll_heuristic(%[[VAL_23]]) +! CHECK: return +! CHECK: } \ No newline at end of file diff --git a/flang/test/Lower/OpenMP/unroll-heuristic02.f90 b/flang/test/Lower/OpenMP/unroll-heuristic02.f90 index 14f694d6cdb7..fdb1366960b2 100644 --- a/flang/test/Lower/OpenMP/unroll-heuristic02.f90 +++ b/flang/test/Lower/OpenMP/unroll-heuristic02.f90 @@ -37,61 +37,55 @@ end subroutine omp_unroll_heuristic_nested02 !CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic_nested02Eouter_ub"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) !CHECK: %[[VAL_11:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_unroll_heuristic_nested02Eres"} !CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFomp_unroll_heuristic_nested02Eres"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[VAL_13:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFomp_unroll_heuristic_nested02Ei"} -!CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFomp_unroll_heuristic_nested02Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[VAL_15:.*]] = fir.alloca i32 {bindc_name = "j", pinned, uniq_name = "_QFomp_unroll_heuristic_nested02Ej"} -!CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFomp_unroll_heuristic_nested02Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref -!CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_10]]#0 : !fir.ref -!CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref -!CHECK: %[[VAL_20:.*]] = arith.constant 0 : i32 -!CHECK: %[[VAL_21:.*]] = arith.constant 1 : i32 -!CHECK: %[[VAL_22:.*]] = arith.cmpi slt, %[[VAL_19]], %[[VAL_20]] : i32 -!CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_20]], %[[VAL_19]] : i32 -!CHECK: %[[VAL_24:.*]] = arith.select %[[VAL_22]], %[[VAL_23]], %[[VAL_19]] : i32 -!CHECK: %[[VAL_25:.*]] = arith.select %[[VAL_22]], %[[VAL_18]], %[[VAL_17]] : i32 -!CHECK: %[[VAL_26:.*]] = arith.select %[[VAL_22]], %[[VAL_17]], %[[VAL_18]] : i32 -!CHECK: %[[VAL_27:.*]] = arith.subi %[[VAL_26]], %[[VAL_25]] overflow : i32 -!CHECK: %[[VAL_28:.*]] = arith.divui %[[VAL_27]], %[[VAL_24]] : i32 -!CHECK: %[[VAL_29:.*]] = arith.addi %[[VAL_28]], %[[VAL_21]] overflow : i32 -!CHECK: %[[VAL_30:.*]] = arith.cmpi slt, %[[VAL_26]], %[[VAL_25]] : i32 -!CHECK: %[[VAL_31:.*]] = arith.select %[[VAL_30]], %[[VAL_20]], %[[VAL_29]] : i32 -!CHECK: %[[VAL_32:.*]] = omp.new_cli -!CHECK: omp.canonical_loop(%[[VAL_32]]) %[[VAL_33:.*]] : i32 in range(%[[VAL_31]]) { -!CHECK: %[[VAL_34:.*]] = arith.muli %[[VAL_33]], %[[VAL_19]] : i32 -!CHECK: %[[VAL_35:.*]] = arith.addi %[[VAL_17]], %[[VAL_34]] : i32 -!CHECK: hlfir.assign %[[VAL_35]] to %[[VAL_14]]#0 : i32, !fir.ref -!CHECK: %[[VAL_36:.*]] = fir.alloca i32 {bindc_name = "j", pinned, uniq_name = "_QFomp_unroll_heuristic_nested02Ej"} -!CHECK: %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_36]] {uniq_name = "_QFomp_unroll_heuristic_nested02Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref -!CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref -!CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref -!CHECK: %[[VAL_41:.*]] = arith.constant 0 : i32 -!CHECK: %[[VAL_42:.*]] = arith.constant 1 : i32 -!CHECK: %[[VAL_43:.*]] = arith.cmpi slt, %[[VAL_40]], %[[VAL_41]] : i32 -!CHECK: %[[VAL_44:.*]] = arith.subi %[[VAL_41]], %[[VAL_40]] : i32 -!CHECK: %[[VAL_45:.*]] = arith.select %[[VAL_43]], %[[VAL_44]], %[[VAL_40]] : i32 -!CHECK: %[[VAL_46:.*]] = arith.select %[[VAL_43]], %[[VAL_39]], %[[VAL_38]] : i32 -!CHECK: %[[VAL_47:.*]] = arith.select %[[VAL_43]], %[[VAL_38]], %[[VAL_39]] : i32 -!CHECK: %[[VAL_48:.*]] = arith.subi %[[VAL_47]], %[[VAL_46]] overflow : i32 -!CHECK: %[[VAL_49:.*]] = arith.divui %[[VAL_48]], %[[VAL_45]] : i32 -!CHECK: %[[VAL_50:.*]] = arith.addi %[[VAL_49]], %[[VAL_42]] overflow : i32 -!CHECK: %[[VAL_51:.*]] = arith.cmpi slt, %[[VAL_47]], %[[VAL_46]] : i32 -!CHECK: %[[VAL_52:.*]] = arith.select %[[VAL_51]], %[[VAL_41]], %[[VAL_50]] : i32 -!CHECK: %[[VAL_53:.*]] = omp.new_cli -!CHECK: omp.canonical_loop(%[[VAL_53]]) %[[VAL_54:.*]] : i32 in range(%[[VAL_52]]) { -!CHECK: %[[VAL_55:.*]] = arith.muli %[[VAL_54]], %[[VAL_40]] : i32 -!CHECK: %[[VAL_56:.*]] = arith.addi %[[VAL_38]], %[[VAL_55]] : i32 -!CHECK: hlfir.assign %[[VAL_56]] to %[[VAL_37]]#0 : i32, !fir.ref -!CHECK: %[[VAL_57:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref -!CHECK: %[[VAL_58:.*]] = fir.load %[[VAL_37]]#0 : !fir.ref -!CHECK: %[[VAL_59:.*]] = arith.addi %[[VAL_57]], %[[VAL_58]] : i32 -!CHECK: hlfir.assign %[[VAL_59]] to %[[VAL_12]]#0 : i32, !fir.ref +!CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref +!CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_10]]#0 : !fir.ref +!CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref +!CHECK: %[[VAL_16:.*]] = arith.constant 0 : i32 +!CHECK: %[[VAL_17:.*]] = arith.constant 1 : i32 +!CHECK: %[[VAL_18:.*]] = arith.cmpi slt, %[[VAL_15]], %[[VAL_16]] : i32 +!CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_16]], %[[VAL_15]] : i32 +!CHECK: %[[VAL_20:.*]] = arith.select %[[VAL_18]], %[[VAL_19]], %[[VAL_15]] : i32 +!CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_18]], %[[VAL_14]], %[[VAL_13]] : i32 +!CHECK: %[[VAL_22:.*]] = arith.select %[[VAL_18]], %[[VAL_13]], %[[VAL_14]] : i32 +!CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_22]], %[[VAL_21]] overflow : i32 +!CHECK: %[[VAL_24:.*]] = arith.divui %[[VAL_23]], %[[VAL_20]] : i32 +!CHECK: %[[VAL_25:.*]] = arith.addi %[[VAL_24]], %[[VAL_17]] overflow : i32 +!CHECK: %[[VAL_26:.*]] = arith.cmpi slt, %[[VAL_22]], %[[VAL_21]] : i32 +!CHECK: %[[VAL_27:.*]] = arith.select %[[VAL_26]], %[[VAL_16]], %[[VAL_25]] : i32 +!CHECK: %[[VAL_28:.*]] = omp.new_cli +!CHECK: omp.canonical_loop(%[[VAL_28]]) %[[VAL_29:.*]] : i32 in range(%[[VAL_27]]) { +!CHECK: %[[VAL_30:.*]] = arith.muli %[[VAL_29]], %[[VAL_15]] : i32 +!CHECK: %[[VAL_31:.*]] = arith.addi %[[VAL_13]], %[[VAL_30]] : i32 +!CHECK: hlfir.assign %[[VAL_31]] to %[[VAL_2]]#0 : i32, !fir.ref +!CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref +!CHECK: %[[VAL_33:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +!CHECK: %[[VAL_34:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref +!CHECK: %[[VAL_35:.*]] = arith.constant 0 : i32 +!CHECK: %[[VAL_36:.*]] = arith.constant 1 : i32 +!CHECK: %[[VAL_37:.*]] = arith.cmpi slt, %[[VAL_34]], %[[VAL_35]] : i32 +!CHECK: %[[VAL_38:.*]] = arith.subi %[[VAL_35]], %[[VAL_34]] : i32 +!CHECK: %[[VAL_39:.*]] = arith.select %[[VAL_37]], %[[VAL_38]], %[[VAL_34]] : i32 +!CHECK: %[[VAL_40:.*]] = arith.select %[[VAL_37]], %[[VAL_33]], %[[VAL_32]] : i32 +!CHECK: %[[VAL_41:.*]] = arith.select %[[VAL_37]], %[[VAL_32]], %[[VAL_33]] : i32 +!CHECK: %[[VAL_42:.*]] = arith.subi %[[VAL_41]], %[[VAL_40]] overflow : i32 +!CHECK: %[[VAL_43:.*]] = arith.divui %[[VAL_42]], %[[VAL_39]] : i32 +!CHECK: %[[VAL_44:.*]] = arith.addi %[[VAL_43]], %[[VAL_36]] overflow : i32 +!CHECK: %[[VAL_45:.*]] = arith.cmpi slt, %[[VAL_41]], %[[VAL_40]] : i32 +!CHECK: %[[VAL_46:.*]] = arith.select %[[VAL_45]], %[[VAL_35]], %[[VAL_44]] : i32 +!CHECK: %[[VAL_47:.*]] = omp.new_cli +!CHECK: omp.canonical_loop(%[[VAL_47]]) %[[VAL_48:.*]] : i32 in range(%[[VAL_46]]) { +!CHECK: %[[VAL_49:.*]] = arith.muli %[[VAL_48]], %[[VAL_34]] : i32 +!CHECK: %[[VAL_50:.*]] = arith.addi %[[VAL_32]], %[[VAL_49]] : i32 +!CHECK: hlfir.assign %[[VAL_50]] to %[[VAL_7]]#0 : i32, !fir.ref +!CHECK: %[[VAL_51:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref +!CHECK: %[[VAL_52:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref +!CHECK: %[[VAL_53:.*]] = arith.addi %[[VAL_51]], %[[VAL_52]] : i32 +!CHECK: hlfir.assign %[[VAL_53]] to %[[VAL_12]]#0 : i32, !fir.ref !CHECK: omp.terminator !CHECK: } -!CHECK: omp.unroll_heuristic(%[[VAL_53]]) +!CHECK: omp.unroll_heuristic(%[[VAL_47]]) !CHECK: omp.terminator !CHECK: } -!CHECK: omp.unroll_heuristic(%[[VAL_32]]) +!CHECK: omp.unroll_heuristic(%[[VAL_28]]) !CHECK: return !CHECK: } diff --git a/flang/test/Lower/OpenMP/unroll-heuristic03.f90 b/flang/test/Lower/OpenMP/unroll-heuristic03.f90 new file mode 100644 index 000000000000..308c149c260d --- /dev/null +++ b/flang/test/Lower/OpenMP/unroll-heuristic03.f90 @@ -0,0 +1,61 @@ +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | FileCheck %s + +! Test implicitly privatized loop variable that is affected by unrolling. + +subroutine omp_unroll_heuristic03(lb, ub, inc) + integer res, i, lb, ub, inc + + !$omp parallel + !$omp unroll + do i = lb, ub, inc + res = i + end do + !$omp end unroll + !$omp end parallel + +end subroutine omp_unroll_heuristic03 + + +! CHECK-LABEL: func.func @_QPomp_unroll_heuristic03( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "lb"}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "ub"}, +! CHECK-SAME: %[[ARG2:.*]]: !fir.ref {fir.bindc_name = "inc"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_unroll_heuristic03Ei"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFomp_unroll_heuristic03Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic03Einc"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic03Elb"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_unroll_heuristic03Eres"} +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFomp_unroll_heuristic03Eres"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic03Eub"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: omp.parallel private(@_QFomp_unroll_heuristic03Ei_private_i32 %[[VAL_2]]#0 -> %[[VAL_8:.*]] : !fir.ref) { +! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFomp_unroll_heuristic03Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref +! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref +! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref +! CHECK: %[[VAL_13:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_15:.*]] = arith.cmpi slt, %[[VAL_12]], %[[VAL_13]] : i32 +! CHECK: %[[VAL_16:.*]] = arith.subi %[[VAL_13]], %[[VAL_12]] : i32 +! CHECK: %[[VAL_17:.*]] = arith.select %[[VAL_15]], %[[VAL_16]], %[[VAL_12]] : i32 +! CHECK: %[[VAL_18:.*]] = arith.select %[[VAL_15]], %[[VAL_11]], %[[VAL_10]] : i32 +! CHECK: %[[VAL_19:.*]] = arith.select %[[VAL_15]], %[[VAL_10]], %[[VAL_11]] : i32 +! CHECK: %[[VAL_20:.*]] = arith.subi %[[VAL_19]], %[[VAL_18]] overflow : i32 +! CHECK: %[[VAL_21:.*]] = arith.divui %[[VAL_20]], %[[VAL_17]] : i32 +! CHECK: %[[VAL_22:.*]] = arith.addi %[[VAL_21]], %[[VAL_14]] overflow : i32 +! CHECK: %[[VAL_23:.*]] = arith.cmpi slt, %[[VAL_19]], %[[VAL_18]] : i32 +! CHECK: %[[VAL_24:.*]] = arith.select %[[VAL_23]], %[[VAL_13]], %[[VAL_22]] : i32 +! CHECK: %[[VAL_25:.*]] = omp.new_cli +! CHECK: omp.canonical_loop(%[[VAL_25]]) %[[VAL_26:.*]] : i32 in range(%[[VAL_24]]) { +! CHECK: %[[VAL_27:.*]] = arith.muli %[[VAL_26]], %[[VAL_12]] : i32 +! CHECK: %[[VAL_28:.*]] = arith.addi %[[VAL_10]], %[[VAL_27]] : i32 +! CHECK: hlfir.assign %[[VAL_28]] to %[[VAL_9]]#0 : i32, !fir.ref +! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref +! CHECK: hlfir.assign %[[VAL_29]] to %[[VAL_6]]#0 : i32, !fir.ref +! CHECK: omp.terminator +! CHECK: } +! CHECK: omp.unroll_heuristic(%[[VAL_25]]) +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } \ No newline at end of file