[OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (#134709)

Codegen support for reduction over private variable with reduction
clause. Section 7.6.10 in in OpenMP 6.0 spec.
- An internal shared copy is initialized with an initializer value.
- The shared copy is updated by combining its value with the values from
the private copies created by the clause.
- Once an encountering thread verifies that all updates are complete,
its original list item is updated by merging its value with that of the
shared copy and then broadcast to all threads.

Sample Test Case from OpenMP 6.0 Example 
```
#include <assert.h>
#include <omp.h>
#define N 10

void do_red(int n, int *v, int &sum_v)
{
    sum_v = 0; // sum_v is private
    #pragma omp for reduction(original(private),+: sum_v)
    for (int i = 0; i < n; i++) 
    {
        sum_v += v[i];
    }
}

int main(void)
{
    int v[N];
    for (int i = 0; i < N; i++)
        v[i] = i;
    #pragma omp parallel num_threads(4)
    {
        int s_v; // s_v is private
        do_red(N, v, s_v);
        assert(s_v == 45);
    }
    return 0;
}
```
Expected Codegen:
```
 // A shared global/static variable is introduced for the reduction result.
 // This variable is initialized (e.g., using memset or a UDR initializer)
 // e.g., .omp.reduction.internal_private_var

 // Barrier before any thread performs combination
  call void @__kmpc_barrier(...)

 // Initialization block (executed by thread 0)
 // e.g., call void @llvm.memset.p0.i64(...) or call @udr_initializer(...)

  call void @__kmpc_critical(...)
    // Inside critical section:
    // Load the current value from the shared variable
    // Load the thread-local private variable's value
    // Perform the reduction operation 
    // Store the result back to the shared variable

  call void @__kmpc_end_critical(...)
  // Barrier after all threads complete their combinations

  call void @__kmpc_barrier(...)
 // Broadcast phase:
 // Load the final result from the shared variable)
 // Store the final result to the original private variable in each thread
 // Final barrier after broadcast

  call void @__kmpc_barrier(...)
```

---------

Co-authored-by: Chandra Ghale <ghale@pe31.hpc.amslabs.hpecorp.net>
This commit is contained in:
CHANDRA GHALE 2025-06-11 14:01:31 +05:30 committed by GitHub
parent 937be17752
commit afbcf9529a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 1235 additions and 38 deletions

View File

@ -406,7 +406,8 @@ implementation.
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| Extensions to atomic construct | :none:`unclaimed` | :none:`unclaimed` | | | Extensions to atomic construct | :none:`unclaimed` | :none:`unclaimed` | |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| Private reductions | :part:`partial` | :none:`unclaimed` | Parse/Sema:https://github.com/llvm/llvm-project/pull/129938 | | Private reductions | :good:`mostly` | :none:`unclaimed` | Parse/Sema:https://github.com/llvm/llvm-project/pull/129938 |
| | | | Codegen: https://github.com/llvm/llvm-project/pull/134709 |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| Self maps | :part:`partial` | :none:`unclaimed` | parsing/sema done: https://github.com/llvm/llvm-project/pull/129888 | | Self maps | :part:`partial` | :none:`unclaimed` | parsing/sema done: https://github.com/llvm/llvm-project/pull/129888 |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+

View File

@ -1100,6 +1100,7 @@ OpenMP Support
open parenthesis. (#GH139665) open parenthesis. (#GH139665)
- An error is now emitted when OpenMP ``collapse`` and ``ordered`` clauses have - An error is now emitted when OpenMP ``collapse`` and ``ordered`` clauses have
an argument larger than what can fit within a 64-bit integer. an argument larger than what can fit within a 64-bit integer.
- Added support for private variable reduction.
Improvements Improvements
^^^^^^^^^^^^ ^^^^^^^^^^^^

View File

@ -4907,11 +4907,255 @@ void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
} }
} }
static std::string generateUniqueName(CodeGenModule &CGM,
llvm::StringRef Prefix, const Expr *Ref);
void CGOpenMPRuntime::emitPrivateReduction(
CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
// Create a shared global variable (__shared_reduction_var) to accumulate the
// final result.
//
// Call __kmpc_barrier to synchronize threads before initialization.
//
// The master thread (thread_id == 0) initializes __shared_reduction_var
// with the identity value or initializer.
//
// Call __kmpc_barrier to synchronize before combining.
// For each i:
// - Thread enters critical section.
// - Reads its private value from LHSExprs[i].
// - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
// Privates[i]).
// - Exits critical section.
//
// Call __kmpc_barrier after combining.
//
// Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
//
// Final __kmpc_barrier to synchronize after broadcasting
QualType PrivateType = Privates->getType();
llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
std::string ReductionVarNameStr;
if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts()))
ReductionVarNameStr =
generateUniqueName(CGM, DRE->getDecl()->getNameAsString(), Privates);
else
ReductionVarNameStr = "unnamed_priv_var";
// Create an internal shared variable
std::string SharedName =
CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
LLVMType, ".omp.reduction." + SharedName);
SharedVar->setAlignment(
llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
Address SharedResult =
CGF.MakeNaturalAlignRawAddrLValue(SharedVar, PrivateType).getAddress();
llvm::Value *ThreadId = getThreadID(CGF, Loc);
llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
CGF.EmitBlock(InitBB);
auto EmitSharedInit = [&]() {
if (UDR) { // Check if it's a User-Defined Reduction
if (const Expr *UDRInitExpr = UDR->getInitializer()) {
std::pair<llvm::Function *, llvm::Function *> FnPair =
getUserDefinedReduction(UDR);
llvm::Function *InitializerFn = FnPair.second;
if (InitializerFn) {
if (const auto *CE =
dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) {
const auto *OutDRE = cast<DeclRefExpr>(
cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
->getSubExpr());
const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl());
CodeGenFunction::OMPPrivateScope LocalScope(CGF);
LocalScope.addPrivate(OutVD, SharedResult);
(void)LocalScope.Privatize();
if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
CE->getCallee()->IgnoreParenImpCasts())) {
CodeGenFunction::OpaqueValueMapping OpaqueMap(
CGF, OVE, RValue::get(InitializerFn));
CGF.EmitIgnoredExpr(CE);
} else {
CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
PrivateType.getQualifiers(),
/*IsInitializer=*/true);
}
} else {
CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
PrivateType.getQualifiers(),
/*IsInitializer=*/true);
}
} else {
CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
PrivateType.getQualifiers(),
/*IsInitializer=*/true);
}
} else {
// EmitNullInitialization handles default construction for C++ classes
// and zeroing for scalars, which is a reasonable default.
CGF.EmitNullInitialization(SharedResult, PrivateType);
}
return; // UDR initialization handled
}
if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
if (const Expr *InitExpr = VD->getInit()) {
CGF.EmitAnyExprToMem(InitExpr, SharedResult,
PrivateType.getQualifiers(), true);
return;
}
}
}
CGF.EmitNullInitialization(SharedResult, PrivateType);
};
EmitSharedInit();
CGF.Builder.CreateBr(InitEndBB);
CGF.EmitBlock(InitEndBB);
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_barrier),
BarrierArgs);
const Expr *ReductionOp = ReductionOps;
const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
LValue LHSLV = CGF.EmitLValue(Privates);
auto EmitCriticalReduction = [&](auto ReductionGen) {
std::string CriticalName = getName({"reduction_critical"});
emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
};
if (CurrentUDR) {
// Handle user-defined reduction.
auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
std::pair<llvm::Function *, llvm::Function *> FnPair =
getUserDefinedReduction(CurrentUDR);
if (FnPair.first) {
if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
const auto *OutDRE = cast<DeclRefExpr>(
cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
->getSubExpr());
const auto *InDRE = cast<DeclRefExpr>(
cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
->getSubExpr());
CodeGenFunction::OMPPrivateScope LocalScope(CGF);
LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
SharedLV.getAddress());
LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
LHSLV.getAddress());
(void)LocalScope.Privatize();
emitReductionCombiner(CGF, ReductionOp);
}
}
};
EmitCriticalReduction(ReductionGen);
} else {
// Handle built-in reduction operations.
#ifndef NDEBUG
const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
const Expr *AssignRHS = nullptr;
if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
if (BinOp->getOpcode() == BO_Assign)
AssignRHS = BinOp->getRHS();
} else if (const auto *OpCall =
dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
if (OpCall->getOperator() == OO_Equal)
AssignRHS = OpCall->getArg(1);
}
assert(AssignRHS &&
"Private Variable Reduction : Invalid ReductionOp expression");
#endif
auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
const auto *OmpOutDRE =
dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts());
const auto *OmpInDRE =
dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts());
assert(
OmpOutDRE && OmpInDRE &&
"Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl());
const VarDecl *OmpInVD = cast<VarDecl>(OmpInDRE->getDecl());
CodeGenFunction::OMPPrivateScope LocalScope(CGF);
LocalScope.addPrivate(OmpOutVD, SharedLV.getAddress());
LocalScope.addPrivate(OmpInVD, LHSLV.getAddress());
(void)LocalScope.Privatize();
// Emit the actual reduction operation
CGF.EmitIgnoredExpr(ReductionOp);
};
EmitCriticalReduction(ReductionGen);
}
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_barrier),
BarrierArgs);
// Broadcast final result
bool IsAggregate = PrivateType->isAggregateType();
LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType);
llvm::Value *FinalResultVal = nullptr;
Address FinalResultAddr = Address::invalid();
if (IsAggregate)
FinalResultAddr = SharedResult;
else
FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
LValue TargetLHSLV = CGF.EmitLValue(RHSExprs);
if (IsAggregate) {
CGF.EmitAggregateCopy(TargetLHSLV,
CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
PrivateType, AggValueSlot::DoesNotOverlap, false);
} else {
CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
}
// Final synchronization barrier
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_barrier),
BarrierArgs);
// Combiner with original list item
auto OriginalListCombiner = [&](CodeGenFunction &CGF,
PrePostActionTy &Action) {
Action.Enter(CGF);
emitSingleReductionCombiner(CGF, ReductionOps, Privates,
cast<DeclRefExpr>(LHSExprs),
cast<DeclRefExpr>(RHSExprs));
};
EmitCriticalReduction(OriginalListCombiner);
}
void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> OrgPrivates,
ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> OrgLHSExprs,
ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> OrgRHSExprs,
ArrayRef<const Expr *> ReductionOps, ArrayRef<const Expr *> OrgReductionOps,
ReductionOptionsTy Options) { ReductionOptionsTy Options) {
if (!CGF.HaveInsertPoint()) if (!CGF.HaveInsertPoint())
return; return;
@ -4958,10 +5202,10 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
if (SimpleReduction) { if (SimpleReduction) {
CodeGenFunction::RunCleanupsScope Scope(CGF); CodeGenFunction::RunCleanupsScope Scope(CGF);
const auto *IPriv = Privates.begin(); const auto *IPriv = OrgPrivates.begin();
const auto *ILHS = LHSExprs.begin(); const auto *ILHS = OrgLHSExprs.begin();
const auto *IRHS = RHSExprs.begin(); const auto *IRHS = OrgRHSExprs.begin();
for (const Expr *E : ReductionOps) { for (const Expr *E : OrgReductionOps) {
emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
cast<DeclRefExpr>(*IRHS)); cast<DeclRefExpr>(*IRHS));
++IPriv; ++IPriv;
@ -4971,6 +5215,26 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
return; return;
} }
// Filter out shared reduction variables based on IsPrivateVarReduction flag.
// Only keep entries where the corresponding variable is not private.
SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
FilteredRHSExprs, FilteredReductionOps;
for (unsigned I : llvm::seq<unsigned>(
std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
if (!Options.IsPrivateVarReduction[I]) {
FilteredPrivates.emplace_back(OrgPrivates[I]);
FilteredLHSExprs.emplace_back(OrgLHSExprs[I]);
FilteredRHSExprs.emplace_back(OrgRHSExprs[I]);
FilteredReductionOps.emplace_back(OrgReductionOps[I]);
}
}
// Wrap filtered vectors in ArrayRef for downstream shared reduction
// processing.
ArrayRef<const Expr *> Privates = FilteredPrivates;
ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
// 1. Build a list of reduction variables. // 1. Build a list of reduction variables.
// void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
auto Size = RHSExprs.size(); auto Size = RHSExprs.size();
@ -5162,7 +5426,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
} else { } else {
// Emit as a critical region. // Emit as a critical region.
auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
const Expr *, const Expr *) { const Expr *, const Expr *) {
CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
std::string Name = RT.getName({"atomic_reduction"}); std::string Name = RT.getName({"atomic_reduction"});
RT.emitCriticalRegion( RT.emitCriticalRegion(
@ -5209,6 +5473,16 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitBranch(DefaultBB); CGF.EmitBranch(DefaultBB);
CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
assert(OrgLHSExprs.size() == OrgPrivates.size() &&
"PrivateVarReduction: Privates size mismatch");
assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
"PrivateVarReduction: ReductionOps size mismatch");
for (unsigned I : llvm::seq<unsigned>(
std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
if (Options.IsPrivateVarReduction[I])
emitPrivateReduction(CGF, Loc, OrgPrivates[I], OrgLHSExprs[I],
OrgRHSExprs[I], OrgReductionOps[I]);
}
} }
/// Generates unique name for artificial threadprivate variables. /// Generates unique name for artificial threadprivate variables.

View File

@ -1201,8 +1201,20 @@ public:
struct ReductionOptionsTy { struct ReductionOptionsTy {
bool WithNowait; bool WithNowait;
bool SimpleReduction; bool SimpleReduction;
llvm::SmallVector<bool, 8> IsPrivateVarReduction;
OpenMPDirectiveKind ReductionKind; OpenMPDirectiveKind ReductionKind;
}; };
/// Emits code for private variable reduction
/// \param Privates List of private copies for original reduction arguments.
/// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
/// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
/// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
/// or 'operator binop(LHS, RHS)'.
void emitPrivateReduction(CodeGenFunction &CGF, SourceLocation Loc,
const Expr *Privates, const Expr *LHSExprs,
const Expr *RHSExprs, const Expr *ReductionOps);
/// Emit a code for reduction clause. Next code should be emitted for /// Emit a code for reduction clause. Next code should be emitted for
/// reduction: /// reduction:
/// \code /// \code

View File

@ -1472,6 +1472,7 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
llvm::SmallVector<const Expr *, 8> LHSExprs; llvm::SmallVector<const Expr *, 8> LHSExprs;
llvm::SmallVector<const Expr *, 8> RHSExprs; llvm::SmallVector<const Expr *, 8> RHSExprs;
llvm::SmallVector<const Expr *, 8> ReductionOps; llvm::SmallVector<const Expr *, 8> ReductionOps;
llvm::SmallVector<bool, 8> IsPrivateVarReduction;
bool HasAtLeastOneReduction = false; bool HasAtLeastOneReduction = false;
bool IsReductionWithTaskMod = false; bool IsReductionWithTaskMod = false;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
@ -1482,6 +1483,8 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
Privates.append(C->privates().begin(), C->privates().end()); Privates.append(C->privates().begin(), C->privates().end());
LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
IsPrivateVarReduction.append(C->private_var_reduction_flags().begin(),
C->private_var_reduction_flags().end());
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
IsReductionWithTaskMod = IsReductionWithTaskMod =
IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
@ -1503,7 +1506,7 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
// parallel directive (it always has implicit barrier). // parallel directive (it always has implicit barrier).
CGM.getOpenMPRuntime().emitReduction( CGM.getOpenMPRuntime().emitReduction(
*this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
{WithNowait, SimpleReduction, ReductionKind}); {WithNowait, SimpleReduction, IsPrivateVarReduction, ReductionKind});
} }
} }
@ -3944,7 +3947,8 @@ static void emitScanBasedDirective(
PrivScope.Privatize(); PrivScope.Privatize();
CGF.CGM.getOpenMPRuntime().emitReduction( CGF.CGM.getOpenMPRuntime().emitReduction(
CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
{/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); {/*WithNowait=*/true, /*SimpleReduction=*/true,
/*IsPrivateVarReduction*/ {}, OMPD_unknown});
} }
llvm::Value *NextIVal = llvm::Value *NextIVal =
CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
@ -5749,7 +5753,8 @@ void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
} }
CGM.getOpenMPRuntime().emitReduction( CGM.getOpenMPRuntime().emitReduction(
*this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
{/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd}); {/*WithNowait=*/true, /*SimpleReduction=*/true,
/*IsPrivateVarReduction*/ {}, OMPD_simd});
for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
const Expr *PrivateExpr = Privates[I]; const Expr *PrivateExpr = Privates[I];
LValue DestLVal; LValue DestLVal;

View File

@ -19047,34 +19047,14 @@ static bool actOnOMPReductionKindClause(
reportOriginalDsa(S, Stack, D, DVar); reportOriginalDsa(S, Stack, D, DVar);
continue; continue;
} }
// OpenMP 6.0 [ 7.6.10 ]
// Support Reduction over private variables with reduction clause.
// A list item in a reduction clause can now be private in the enclosing
// context. For orphaned constructs it is assumed to be shared unless the
// original(private) modifier appears in the clause.
DVar = Stack->getImplicitDSA(D, true);
bool IsOrphaned = false;
OpenMPDirectiveKind CurrDir = Stack->getCurrentDirective();
OpenMPDirectiveKind ParentDir = Stack->getParentDirective();
// Check if the construct is orphaned (has no enclosing OpenMP context)
IsOrphaned = ParentDir == OMPD_unknown;
// OpenMP 6.0: Private DSA check
IsPrivate =
(S.getLangOpts().OpenMP > 52) &&
((isOpenMPPrivate(DVar.CKind) && DVar.CKind != OMPC_reduction &&
isOpenMPWorksharingDirective(CurrDir) &&
!isOpenMPParallelDirective(CurrDir) &&
!isOpenMPTeamsDirective(CurrDir) &&
!isOpenMPSimdDirective(ParentDir)) ||
(IsOrphaned && DVar.CKind == OMPC_unknown) ||
RD.OrigSharingModifier != OMPC_ORIGINAL_SHARING_shared);
// OpenMP [2.14.3.6, Restrictions, p.1] // OpenMP [2.14.3.6, Restrictions, p.1]
// A list item that appears in a reduction clause of a worksharing // A list item that appears in a reduction clause of a worksharing
// construct must be shared in the parallel regions to which any of the // construct must be shared in the parallel regions to which any of the
// worksharing regions arising from the worksharing construct bind. // worksharing regions arising from the worksharing construct bind.
if (!IsPrivate && isOpenMPWorksharingDirective(CurrDir) && if (S.getLangOpts().OpenMP <= 52 &&
isOpenMPWorksharingDirective(CurrDir) &&
!isOpenMPParallelDirective(CurrDir) && !isOpenMPParallelDirective(CurrDir) &&
!isOpenMPTeamsDirective(CurrDir)) { !isOpenMPTeamsDirective(CurrDir)) {
DVar = Stack->getImplicitDSA(D, true); DVar = Stack->getImplicitDSA(D, true);
@ -19085,6 +19065,23 @@ static bool actOnOMPReductionKindClause(
reportOriginalDsa(S, Stack, D, DVar); reportOriginalDsa(S, Stack, D, DVar);
continue; continue;
} }
} else if (isOpenMPWorksharingDirective(CurrDir) &&
!isOpenMPParallelDirective(CurrDir) &&
!isOpenMPTeamsDirective(CurrDir)) {
// OpenMP 6.0 [ 7.6.10 ]
// Support Reduction over private variables with reduction clause.
// A list item in a reduction clause can now be private in the enclosing
// context. For orphaned constructs it is assumed to be shared unless
// the original(private) modifier appears in the clause.
DVar = Stack->getImplicitDSA(D, true);
// Determine if the variable should be considered private
IsPrivate = DVar.CKind != OMPC_shared;
bool IsOrphaned = false;
OpenMPDirectiveKind ParentDir = Stack->getParentDirective();
IsOrphaned = ParentDir == OMPD_unknown;
if ((IsOrphaned &&
RD.OrigSharingModifier == OMPC_ORIGINAL_SHARING_private))
IsPrivate = true;
} }
} else { } else {
// Threadprivates cannot be shared between threads, so dignose if the base // Threadprivates cannot be shared between threads, so dignose if the base

View File

@ -508,6 +508,7 @@ void test_collapse(void) {
#pragma omp distribute simd collapse(5 - 5) #pragma omp distribute simd collapse(5 - 5)
for (i = 0; i < 16; ++i) for (i = 0; i < 16; ++i)
; ;
#if defined(_OPENMP) && (_OPENMP <= 202111)
// expected-note@+3 2 {{defined as reduction}} // expected-note@+3 2 {{defined as reduction}}
#pragma omp target #pragma omp target
#pragma omp teams #pragma omp teams
@ -520,7 +521,7 @@ void test_collapse(void) {
#pragma omp for reduction(+ : i, j) #pragma omp for reduction(+ : i, j)
for (int k = 0; k < 16; ++k) for (int k = 0; k < 16; ++k)
i += j; i += j;
#endif
#pragma omp target #pragma omp target
#pragma omp teams #pragma omp teams
for (i = 0; i < 16; ++i) for (i = 0; i < 16; ++i)

View File

@ -0,0 +1,710 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-globals --include-generated-funcs --replace-value-regex "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --global-value-regex ".omp.reduction..internal[a-zA-Z_0-9.]+"
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=60 -x c++ -std=c++17 -emit-llvm %s -o - | FileCheck %s
// expected-no-diagnostics
#define N 10
class Sum {
int val;
public:
Sum(int v = 0) : val(v) {}
Sum operator+(const Sum &rhs) const { return Sum(val + rhs.val); }
Sum &operator+=(const Sum &rhs) {
val += rhs.val;
return *this;
}
};
#pragma omp declare reduction(sum_reduction:Sum : omp_out += omp_in) \
initializer(omp_priv = Sum(0))
void func_red() {
Sum result(0);
Sum array[N];
for (int i = 0; i < N; i++) {
array[i] = Sum(i);
}
#pragma omp parallel private(result) num_threads(4)
{
#pragma omp for reduction(sum_reduction : result)
for (int i = 0; i < N; i++) {
result = result + array[i];
}
}
}
void do_red(int n, int *v, int &sum_v) {
sum_v = 0;
#pragma omp for reduction(original(private), + : sum_v)
for (int i = 0; i < n; i++) {
sum_v += v[i];
}
}
void do_red_extended(int n, int *v, int &sum_v, int &prod_v) {
sum_v = 0;
prod_v = 1;
#pragma omp for reduction(original(private), + : sum_v) \
reduction(original(private), * : prod_v)
for (int i = 0; i < n; i++) {
sum_v += v[i];
prod_v *= v[i];
}
}
int main(void) {
int v[N];
for (int i = 0; i < N; i++)
v[i] = i;
#pragma omp parallel num_threads(4)
{
int s_v;
do_red(N, v, s_v);
}
int sum_v_ext = 0, prod_v_ext = 1;
#pragma omp parallel num_threads(4)
{
do_red_extended(N, v, sum_v_ext, prod_v_ext);
}
return 0;
}
//.
// CHECK: @.omp.reduction..internal_pivate_.result.result_996 = common global %class.Sum zeroinitializer, align 4
// CHECK: @.omp.reduction..internal_pivate_.sum_v.sum_v_1188 = common global i32 0, align 4
// CHECK: @.omp.reduction..internal_pivate_.sum_v.sum_v_1392 = common global i32 0, align 4
// CHECK: @.omp.reduction..internal_pivate_.prod_v.prod_v_1461 = common global i32 0, align 4
//.
// CHECK-LABEL: define {{[^@]+}}@_Z8func_redv
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RESULT:%.*]] = alloca [[CLASS_SUM:%.*]], align 4
// CHECK-NEXT: [[ARRAY:%.*]] = alloca [10 x %class.Sum], align 16
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_SUM]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
// CHECK-NEXT: call void @_ZN3SumC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[RESULT]], i32 noundef 0)
// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x %class.Sum], ptr [[ARRAY]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[CLASS_SUM]], ptr [[ARRAY_BEGIN]], i64 10
// CHECK-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
// CHECK: arrayctor.loop:
// CHECK-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ]
// CHECK-NEXT: call void @_ZN3SumC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]], i32 noundef 0)
// CHECK-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_SUM]], ptr [[ARRAYCTOR_CUR]], i64 1
// CHECK-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
// CHECK-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
// CHECK: arrayctor.cont:
// CHECK-NEXT: store i32 0, ptr [[I]], align 4
// CHECK-NEXT: br label [[FOR_COND:%.*]]
// CHECK: for.cond:
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I]], align 4
// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10
// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
// CHECK: for.body:
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
// CHECK-NEXT: call void @_ZN3SumC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4
// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP3]] to i64
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x %class.Sum], ptr [[ARRAY]], i64 0, i64 [[IDXPROM]]
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX]], ptr align 4 [[REF_TMP]], i64 4, i1 false)
// CHECK-NEXT: br label [[FOR_INC:%.*]]
// CHECK: for.inc:
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4
// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4
// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK: for.end:
// CHECK-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP0]], i32 4)
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @_Z8func_redv.omp_outlined, ptr [[ARRAY]])
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@_ZN3SumC1Ei
// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[V:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[V_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// CHECK-NEXT: store i32 [[V]], ptr [[V_ADDR]], align 4
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[V_ADDR]], align 4
// CHECK-NEXT: call void @_ZN3SumC2Ei(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]])
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@_Z8func_redv.omp_outlined
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[ARRAY:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[ARRAY_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[RESULT:%.*]] = alloca [[CLASS_SUM:%.*]], align 4
// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[RESULT1:%.*]] = alloca [[CLASS_SUM]], align 4
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_SUM]], align 4
// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [0 x ptr], align 8
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK-NEXT: store ptr [[ARRAY]], ptr [[ARRAY_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAY_ADDR]], align 8
// CHECK-NEXT: call void @_ZN3SumC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[RESULT]], i32 noundef 0)
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// CHECK-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK-NEXT: call void @.omp_initializer.(ptr noundef [[RESULT1]], ptr noundef [[RESULT]])
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK: cond.true:
// CHECK-NEXT: br label [[COND_END:%.*]]
// CHECK: cond.false:
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: br label [[COND_END]]
// CHECK: cond.end:
// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK: omp.inner.for.cond:
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK: omp.inner.for.body:
// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x %class.Sum], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// CHECK-NEXT: [[CALL:%.*]] = call i32 @_ZNK3SumplERKS_(ptr noundef nonnull align 4 dereferenceable(4) [[RESULT1]], ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]])
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[CLASS_SUM]], ptr [[REF_TMP]], i32 0, i32 0
// CHECK-NEXT: store i32 [[CALL]], ptr [[COERCE_DIVE]], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[RESULT1]], ptr align 4 [[REF_TMP]], i64 4, i1 false)
// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK: omp.body.continue:
// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK: omp.inner.for.inc:
// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK: omp.inner.for.end:
// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK: omp.loop.exit:
// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
// CHECK-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 0, i64 0, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z8func_redv.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// CHECK-NEXT: switch i32 [[TMP11]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK-NEXT: ]
// CHECK: .omp.reduction.case1:
// CHECK-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK: .omp.reduction.case2:
// CHECK-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK: .omp.reduction.default:
// CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP2]], 0
// CHECK-NEXT: br i1 [[TMP12]], label [[INIT:%.*]], label [[INIT_END:%.*]]
// CHECK: init:
// CHECK-NEXT: call void @_ZN3SumC1Ei(ptr noundef nonnull align 4 dereferenceable(4) @.omp.reduction..internal_pivate_.result.result_996, i32 noundef 0)
// CHECK-NEXT: br label [[INIT_END]]
// CHECK: init.end:
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction_critical.var)
// CHECK-NEXT: call void @.omp_combiner.(ptr noundef @.omp.reduction..internal_pivate_.result.result_996, ptr noundef [[RESULT1]])
// CHECK-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction_critical.var)
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK-NEXT: [[TMP13:%.*]] = load [[CLASS_SUM]], ptr @.omp.reduction..internal_pivate_.result.result_996, align 4
// CHECK-NEXT: store [[CLASS_SUM]] [[TMP13]], ptr [[RESULT1]], align 4
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction_critical.var)
// CHECK-NEXT: call void @.omp_combiner.(ptr noundef [[RESULT]], ptr noundef [[RESULT1]])
// CHECK-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction_critical.var)
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP2]])
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@.omp_combiner.
// CHECK-SAME: (ptr noalias noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
// CHECK-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN3SumpLERKS_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]])
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@_ZN3SumpLERKS_
// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RHS:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[RHS_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// CHECK-NEXT: store ptr [[RHS]], ptr [[RHS_ADDR]], align 8
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RHS_ADDR]], align 8
// CHECK-NEXT: [[VAL:%.*]] = getelementptr inbounds nuw [[CLASS_SUM:%.*]], ptr [[TMP0]], i32 0, i32 0
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[VAL]], align 4
// CHECK-NEXT: [[VAL2:%.*]] = getelementptr inbounds nuw [[CLASS_SUM]], ptr [[THIS1]], i32 0, i32 0
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[VAL2]], align 4
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP1]]
// CHECK-NEXT: store i32 [[ADD]], ptr [[VAL2]], align 4
// CHECK-NEXT: ret ptr [[THIS1]]
//
//
// CHECK-LABEL: define {{[^@]+}}@.omp_initializer.
// CHECK-SAME: (ptr noalias noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
// CHECK-NEXT: call void @_ZN3SumC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], i32 noundef 0)
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@_ZNK3SumplERKS_
// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RHS:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[CLASS_SUM:%.*]], align 4
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[RHS_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// CHECK-NEXT: store ptr [[RHS]], ptr [[RHS_ADDR]], align 8
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK-NEXT: [[VAL:%.*]] = getelementptr inbounds nuw [[CLASS_SUM]], ptr [[THIS1]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RHS_ADDR]], align 8
// CHECK-NEXT: [[VAL2:%.*]] = getelementptr inbounds nuw [[CLASS_SUM]], ptr [[TMP1]], i32 0, i32 0
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[VAL2]], align 4
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP2]]
// CHECK-NEXT: call void @_ZN3SumC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[RETVAL]], i32 noundef [[ADD]])
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[CLASS_SUM]], ptr [[RETVAL]], i32 0, i32 0
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[COERCE_DIVE]], align 4
// CHECK-NEXT: ret i32 [[TMP3]]
//
//
// CHECK-LABEL: define {{[^@]+}}@_Z8func_redv.omp_outlined.omp.reduction.reduction_func
// CHECK-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@_ZN3SumC2Ei
// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[V:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[V_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// CHECK-NEXT: store i32 [[V]], ptr [[V_ADDR]], align 4
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK-NEXT: [[VAL:%.*]] = getelementptr inbounds nuw [[CLASS_SUM:%.*]], ptr [[THIS1]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[V_ADDR]], align 4
// CHECK-NEXT: store i32 [[TMP0]], ptr [[VAL]], align 4
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@_Z6do_rediPiRi
// CHECK-SAME: (i32 noundef [[N:%.*]], ptr noundef [[V:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SUM_V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[SUM_V_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[TMP:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[SUM_V4:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[I6:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [0 x ptr], align 8
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
// CHECK-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
// CHECK-NEXT: store ptr [[SUM_V]], ptr [[SUM_V_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_V_ADDR]], align 8
// CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM_V_ADDR]], align 8
// CHECK-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0
// CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK-NEXT: store i32 0, ptr [[I]], align 4
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
// CHECK-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK: omp.precond.then:
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK-NEXT: store i32 0, ptr [[SUM_V4]], align 4
// CHECK-NEXT: store ptr [[SUM_V4]], ptr [[_TMP5]], align 8
// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
// CHECK-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK: cond.true:
// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK-NEXT: br label [[COND_END:%.*]]
// CHECK: cond.false:
// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: br label [[COND_END]]
// CHECK: cond.end:
// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ]
// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK: omp.inner.for.cond:
// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK: omp.inner.for.body:
// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK-NEXT: store i32 [[ADD]], ptr [[I6]], align 4
// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[V_ADDR]], align 8
// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[I6]], align 4
// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[IDXPROM]]
// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP5]], align 8
// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
// CHECK-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP18]]
// CHECK-NEXT: store i32 [[ADD9]], ptr [[TMP19]], align 4
// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK: omp.body.continue:
// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK: omp.inner.for.inc:
// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1
// CHECK-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK: omp.inner.for.end:
// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK: omp.loop.exit:
// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP0]], i32 0, i64 0, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z6do_rediPiRi.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// CHECK-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK-NEXT: ]
// CHECK: .omp.reduction.case1:
// CHECK-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction.var)
// CHECK-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK: .omp.reduction.case2:
// CHECK-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction.var)
// CHECK-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK: .omp.reduction.default:
// CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[TMP0]], 0
// CHECK-NEXT: br i1 [[TMP23]], label [[INIT:%.*]], label [[INIT_END:%.*]]
// CHECK: init:
// CHECK-NEXT: store i32 0, ptr @.omp.reduction..internal_pivate_.sum_v.sum_v_1188, align 4
// CHECK-NEXT: br label [[INIT_END]]
// CHECK: init.end:
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP0]])
// CHECK-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction_critical.var)
// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr @.omp.reduction..internal_pivate_.sum_v.sum_v_1188, align 4
// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[SUM_V4]], align 4
// CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
// CHECK-NEXT: store i32 [[ADD11]], ptr @.omp.reduction..internal_pivate_.sum_v.sum_v_1188, align 4
// CHECK-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction_critical.var)
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP0]])
// CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr @.omp.reduction..internal_pivate_.sum_v.sum_v_1188, align 4
// CHECK-NEXT: store i32 [[TMP26]], ptr [[SUM_V4]], align 4
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP0]])
// CHECK-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction_critical.var)
// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP7]], align 4
// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[SUM_V4]], align 4
// CHECK-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP27]], [[TMP28]]
// CHECK-NEXT: store i32 [[ADD12]], ptr [[TMP7]], align 4
// CHECK-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction_critical.var)
// CHECK-NEXT: br label [[OMP_PRECOND_END]]
// CHECK: omp.precond.end:
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP0]])
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@_Z6do_rediPiRi.omp.reduction.reduction_func
// CHECK-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR5]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@_Z15do_red_extendediPiRiS0_
// CHECK-SAME: (i32 noundef [[N:%.*]], ptr noundef [[V:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SUM_V:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[PROD_V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[SUM_V_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[PROD_V_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[TMP:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[_TMP2:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[SUM_V5:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[PROD_V7:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[I9:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [0 x ptr], align 8
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
// CHECK-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
// CHECK-NEXT: store ptr [[SUM_V]], ptr [[SUM_V_ADDR]], align 8
// CHECK-NEXT: store ptr [[PROD_V]], ptr [[PROD_V_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_V_ADDR]], align 8
// CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PROD_V_ADDR]], align 8
// CHECK-NEXT: store i32 1, ptr [[TMP2]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SUM_V_ADDR]], align 8
// CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PROD_V_ADDR]], align 8
// CHECK-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
// CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
// CHECK-NEXT: store i32 0, ptr [[I]], align 4
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]]
// CHECK-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK: omp.precond.then:
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
// CHECK-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK-NEXT: store i32 0, ptr [[SUM_V5]], align 4
// CHECK-NEXT: store ptr [[SUM_V5]], ptr [[_TMP6]], align 8
// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8
// CHECK-NEXT: store i32 1, ptr [[PROD_V7]], align 4
// CHECK-NEXT: store ptr [[PROD_V7]], ptr [[_TMP8]], align 8
// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
// CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
// CHECK-NEXT: br i1 [[CMP10]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK: cond.true:
// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
// CHECK-NEXT: br label [[COND_END:%.*]]
// CHECK: cond.false:
// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: br label [[COND_END]]
// CHECK: cond.end:
// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ]
// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK: omp.inner.for.cond:
// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]]
// CHECK-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK: omp.inner.for.body:
// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK-NEXT: store i32 [[ADD]], ptr [[I9]], align 4
// CHECK-NEXT: [[TMP19:%.*]] = load ptr, ptr [[V_ADDR]], align 8
// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[I9]], align 4
// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[IDXPROM]]
// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP6]], align 8
// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
// CHECK-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP21]]
// CHECK-NEXT: store i32 [[ADD12]], ptr [[TMP22]], align 4
// CHECK-NEXT: [[TMP24:%.*]] = load ptr, ptr [[V_ADDR]], align 8
// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[I9]], align 4
// CHECK-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP25]] to i64
// CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[IDXPROM13]]
// CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4
// CHECK-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP8]], align 8
// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
// CHECK-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP28]], [[TMP26]]
// CHECK-NEXT: store i32 [[MUL15]], ptr [[TMP27]], align 4
// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK: omp.body.continue:
// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK: omp.inner.for.inc:
// CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP29]], 1
// CHECK-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK: omp.inner.for.end:
// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK: omp.loop.exit:
// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP0]], i32 0, i64 0, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z15do_red_extendediPiRiS0_.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// CHECK-NEXT: switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK-NEXT: ]
// CHECK: .omp.reduction.case1:
// CHECK-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction.var)
// CHECK-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK: .omp.reduction.case2:
// CHECK-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction.var)
// CHECK-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK: .omp.reduction.default:
// CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[TMP0]], 0
// CHECK-NEXT: br i1 [[TMP31]], label [[INIT:%.*]], label [[INIT_END:%.*]]
// CHECK: init:
// CHECK-NEXT: store i32 0, ptr @.omp.reduction..internal_pivate_.sum_v.sum_v_1392, align 4
// CHECK-NEXT: br label [[INIT_END]]
// CHECK: init.end:
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP0]])
// CHECK-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction_critical.var)
// CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr @.omp.reduction..internal_pivate_.sum_v.sum_v_1392, align 4
// CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[SUM_V5]], align 4
// CHECK-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP32]], [[TMP33]]
// CHECK-NEXT: store i32 [[ADD17]], ptr @.omp.reduction..internal_pivate_.sum_v.sum_v_1392, align 4
// CHECK-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction_critical.var)
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP0]])
// CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr @.omp.reduction..internal_pivate_.sum_v.sum_v_1392, align 4
// CHECK-NEXT: store i32 [[TMP34]], ptr [[SUM_V5]], align 4
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP0]])
// CHECK-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction_critical.var)
// CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP9]], align 4
// CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[SUM_V5]], align 4
// CHECK-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
// CHECK-NEXT: store i32 [[ADD18]], ptr [[TMP9]], align 4
// CHECK-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction_critical.var)
// CHECK-NEXT: [[TMP37:%.*]] = icmp eq i32 [[TMP0]], 0
// CHECK-NEXT: br i1 [[TMP37]], label [[INIT19:%.*]], label [[INIT_END20:%.*]]
// CHECK: init19:
// CHECK-NEXT: store i32 1, ptr @.omp.reduction..internal_pivate_.prod_v.prod_v_1461, align 4
// CHECK-NEXT: br label [[INIT_END20]]
// CHECK: init.end20:
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP0]])
// CHECK-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction_critical.var)
// CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr @.omp.reduction..internal_pivate_.prod_v.prod_v_1461, align 4
// CHECK-NEXT: [[TMP39:%.*]] = load i32, ptr [[PROD_V7]], align 4
// CHECK-NEXT: [[MUL21:%.*]] = mul nsw i32 [[TMP38]], [[TMP39]]
// CHECK-NEXT: store i32 [[MUL21]], ptr @.omp.reduction..internal_pivate_.prod_v.prod_v_1461, align 4
// CHECK-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction_critical.var)
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP0]])
// CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr @.omp.reduction..internal_pivate_.prod_v.prod_v_1461, align 4
// CHECK-NEXT: store i32 [[TMP40]], ptr [[PROD_V7]], align 4
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP0]])
// CHECK-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction_critical.var)
// CHECK-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP10]], align 4
// CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr [[PROD_V7]], align 4
// CHECK-NEXT: [[MUL22:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]]
// CHECK-NEXT: store i32 [[MUL22]], ptr [[TMP10]], align 4
// CHECK-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction_critical.var)
// CHECK-NEXT: br label [[OMP_PRECOND_END]]
// CHECK: omp.precond.end:
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP0]])
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@_Z15do_red_extendediPiRiS0_.omp.reduction.reduction_func
// CHECK-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR5]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@main
// CHECK-SAME: () #[[ATTR7:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[V:%.*]] = alloca [10 x i32], align 16
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[SUM_V_EXT:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[PROD_V_EXT:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
// CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4
// CHECK-NEXT: store i32 0, ptr [[I]], align 4
// CHECK-NEXT: br label [[FOR_COND:%.*]]
// CHECK: for.cond:
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I]], align 4
// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10
// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
// CHECK: for.body:
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4
// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP3]] to i64
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[V]], i64 0, i64 [[IDXPROM]]
// CHECK-NEXT: store i32 [[TMP2]], ptr [[ARRAYIDX]], align 4
// CHECK-NEXT: br label [[FOR_INC:%.*]]
// CHECK: for.inc:
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4
// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4
// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK: for.end:
// CHECK-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP0]], i32 4)
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @main.omp_outlined, ptr [[V]])
// CHECK-NEXT: store i32 0, ptr [[SUM_V_EXT]], align 4
// CHECK-NEXT: store i32 1, ptr [[PROD_V_EXT]], align 4
// CHECK-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP0]], i32 4)
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @main.omp_outlined.1, ptr [[V]], ptr [[SUM_V_EXT]], ptr [[PROD_V_EXT]])
// CHECK-NEXT: ret i32 0

View File

@ -417,10 +417,12 @@ int main(int argc, char **argv) {
#pragma omp for reduction(+ : qa[1], qa[0]) #pragma omp for reduction(+ : qa[1], qa[0])
for (int i = 0; i < 10; ++i) for (int i = 0; i < 10; ++i)
foo(); foo();
#if defined(_OPENMP) && (_OPENMP <= 202111)
#pragma omp parallel reduction(* : fl) // expected-note {{defined as reduction}} #pragma omp parallel reduction(* : fl) // expected-note {{defined as reduction}}
#pragma omp for reduction(+ : fl) // expected-error {{reduction variable must be shared}} #pragma omp for reduction(+ : fl) // expected-error {{reduction variable must be shared}}
for (int i = 0; i < 10; ++i) for (int i = 0; i < 10; ++i)
foo(); foo();
#endif
static int m=0; static int m=0;
#pragma omp for reduction(+:m) #pragma omp for reduction(+:m)
for (int i = 0; i < 10; ++i) for (int i = 0; i < 10; ++i)

View File

@ -396,11 +396,11 @@ int main(int argc, char **argv) {
#pragma omp for simd reduction(+ : fl) // expected-error {{reduction variable must be shared}} #pragma omp for simd reduction(+ : fl) // expected-error {{reduction variable must be shared}}
for (int i = 0; i < 10; ++i) for (int i = 0; i < 10; ++i)
foo(); foo();
#endif
#pragma omp parallel reduction(* : fl) // expected-note {{defined as reduction}} #pragma omp parallel reduction(* : fl) // expected-note {{defined as reduction}}
#pragma omp for simd reduction(+ : fl) // expected-error {{reduction variable must be shared}} #pragma omp for simd reduction(+ : fl) // expected-error {{reduction variable must be shared}}
for (int i = 0; i < 10; ++i) for (int i = 0; i < 10; ++i)
foo(); foo();
#endif
static int m; static int m;
#pragma omp for simd reduction(+ : m) #pragma omp for simd reduction(+ : m)
for (int i = 0; i < 10; ++i) for (int i = 0; i < 10; ++i)

View File

@ -461,12 +461,12 @@ int main(int argc, char **argv) {
{ {
foo(); foo();
} }
#endif
#pragma omp parallel reduction(* : fl) // expected-note {{defined as reduction}} #pragma omp parallel reduction(* : fl) // expected-note {{defined as reduction}}
#pragma omp sections reduction(+ : fl) // expected-error {{reduction variable must be shared}} #pragma omp sections reduction(+ : fl) // expected-error {{reduction variable must be shared}}
{ {
foo(); foo();
} }
#endif
static int m; static int m;
#pragma omp sections reduction(+ : m) // OK #pragma omp sections reduction(+ : m) // OK
{ {

View File

@ -0,0 +1,194 @@
// RUN: %libomp-cxx-compile -fopenmp-version=60 && %libomp-run
#include <stdio.h>
#include <omp.h>
#include <limits.h>
#include <complex.h>
#include <math.h>
#include "omp_testsuite.h"
#define N 10
class Sum {
int val;
public:
Sum(int v = 0) : val(v) {}
Sum operator+(const Sum &rhs) const { return Sum(val + rhs.val); }
Sum &operator+=(const Sum &rhs) {
val += rhs.val;
return *this;
}
int getValue() const { return val; }
};
// Declare OpenMP reduction
#pragma omp declare reduction(sum_reduction:Sum : omp_out += omp_in) \
initializer(omp_priv = Sum(0))
#pragma omp declare reduction(sum_pctor_reduction:Sum : omp_out += omp_in) \
initializer(omp_priv = Sum(1)) // non-default ctor
int checkUserDefinedReduction() {
Sum final_result_udr(0);
Sum final_result_udr_pctor(1);
Sum array_sum[N];
int error_flag = 0;
int expected_value = 0;
int expected_value_pctor = 0;
for (int i = 0; i < N; ++i) {
array_sum[i] = Sum(i);
expected_value += i; // Calculate expected sum: 0 + 1 + ... + (N-1)
expected_value_pctor += i;
}
int num_threads_for_pctor_calc = 4; // num_threads(4)
int priv_initializer_val_pctor = 1; // initializer(omp_priv = Sum(1))
expected_value_pctor +=
num_threads_for_pctor_calc + priv_initializer_val_pctor;
#pragma omp parallel num_threads(4) private(final_result_udr) private( \
final_result_udr_pctor)
{
#pragma omp for reduction(sum_reduction : final_result_udr) \
reduction(sum_pctor_reduction : final_result_udr_pctor)
for (int i = 0; i < N; ++i) {
final_result_udr += array_sum[i];
final_result_udr_pctor += array_sum[i];
}
if (final_result_udr.getValue() != expected_value ||
final_result_udr_pctor.getValue() != expected_value_pctor)
error_flag += 1;
}
return error_flag;
}
void performMinMaxRed(int &min_val, int &max_val) {
int input_data[] = {7, 3, 12, 5, 8};
int n_size = sizeof(input_data) / sizeof(input_data[0]);
min_val = INT_MAX;
max_val = INT_MIN;
#pragma omp for reduction(original(private), min : min_val) \
reduction(original(private), max : max_val)
for (int i = 0; i < n_size; ++i) {
if (input_data[i] < min_val)
min_val = input_data[i];
if (input_data[i] > max_val)
max_val = input_data[i];
}
}
int performComplexReduction() {
double _Complex arr[N];
double _Complex expected = 0.0 + 0.0 * I;
double _Complex result = 0.0 + 0.0 * I;
int error = 0;
// Initialize the array and compute serial sum
for (int i = 0; i < N; ++i) {
arr[i] = i - i * I;
expected += arr[i];
}
double real_sum = 0.0, imag_sum = 0.0;
#pragma omp parallel private(real_sum) private(imag_sum)
{
#pragma omp for reduction(+ : real_sum, imag_sum)
for (int i = 0; i < N; ++i) {
real_sum += creal(arr[i]);
imag_sum += cimag(arr[i]);
}
result = real_sum + imag_sum * I;
if (cabs(result - expected) > 1e-6) {
error++;
}
}
return error;
}
std::complex<double> doComplexReduction(std::complex<double> *arr) {
std::complex<double> result(1, 0);
#pragma omp declare reduction(* : std::complex<double> : omp_out *= omp_in) \
initializer(omp_priv = std::complex<double>(1, 0))
#pragma omp for reduction(original(private), * : result)
for (int i = 0; i < N; ++i)
result *= arr[i];
return result;
}
void performReductions(int n_elements, const int *input_values,
int &sum_val_out, int &prod_val_out,
float &float_sum_val_out) {
// private variables for this thread's reduction.
sum_val_out = 0;
prod_val_out = 1;
float_sum_val_out = 0.0f;
const float kPiValue = 3.14f;
#pragma omp for reduction(original(private), + : sum_val_out) \
reduction(original(private), * : prod_val_out) \
reduction(original(private), + : float_sum_val_out)
for (int i = 0; i < n_elements; ++i) {
sum_val_out += input_values[i];
prod_val_out *= (i + 1);
float_sum_val_out += kPiValue;
}
}
int main(void) {
int input_array[N];
int total_errors = 0;
const float kPiVal = 3.14f;
const int kExpectedSum = 45; // Sum of 0..9
const int kExpectedProd = 3628800; // 10!
const float kExpectedFsum = kPiVal * N; // 3.14f * 10
const int kExpectedMin = 3;
const int kExpectedMax = 12;
std::complex<double> arr[N];
std::complex<double> kExpectedComplex(1, 0);
// Initialize the array
for (int i = 1; i <= N; ++i) {
arr[i - 1] = std::complex<double>(
1.0 + 0.1 * i, 0.5 * i); // Avoid zero to prevent multiplication by zero
kExpectedComplex *= arr[i - 1];
}
for (int i = 0; i < N; i++)
input_array[i] = i;
#pragma omp parallel num_threads(4)
{
int t_sum_v;
int t_prod_v;
float t_fsum_v;
performReductions(N, input_array, t_sum_v, t_prod_v, t_fsum_v);
if (t_sum_v != kExpectedSum)
total_errors++;
if (t_prod_v != kExpectedProd)
total_errors++;
if (t_fsum_v != kExpectedFsum)
total_errors++;
}
#pragma omp parallel num_threads(4)
{
int t_min_v;
int t_max_v;
performMinMaxRed(t_min_v, t_max_v);
if (t_min_v != kExpectedMin)
total_errors++;
if (t_max_v != kExpectedMax)
total_errors++;
}
total_errors += checkUserDefinedReduction();
total_errors += performComplexReduction();
#pragma omp parallel num_threads(4)
{
std::complex<double> result(1, 0);
result = doComplexReduction(arr);
if (std::abs(result.real() - kExpectedComplex.real()) > 1e-6 ||
std::abs(result.imag() - kExpectedComplex.imag()) > 1e-6) {
total_errors++;
}
}
if (total_errors != 0)
fprintf(stderr, "ERROR: reduction on private variable %d\n", total_errors);
return total_errors;
}