[OpenMP][CodeGen] Add codegen for combined 'loop' directives.

The loop directive is a descriptive construct which allows the compiler
flexibility in how it generates code for the directive's associated
loop(s). See OpenMP specification 5.2 [257:8-9].

Codegen added in this patch for the combined 'loop' directives are:

'target teams loop'     -> 'target teams distribute parallel for'
'teams loop'            -> 'teams distribute parallel for'
'target parallel loop'  -> 'target parallel for'
'parallel loop'         -> 'parallel for'

NOTE: The implementation of the 'loop' directive itself is unchanged.

Differential Revision: https://reviews.llvm.org/D145823
This commit is contained in:
Dave Pagan 2023-07-01 19:10:28 -05:00
parent 73c12a8ffc
commit eb61bde829
32 changed files with 42944 additions and 32 deletions

View File

@ -2374,6 +2374,10 @@ OMPTeamsGenericLoopDirective *OMPTeamsGenericLoopDirective::Create(
Dir->setNextLowerBound(Exprs.NLB);
Dir->setNextUpperBound(Exprs.NUB);
Dir->setNumIterations(Exprs.NumIterations);
Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
Dir->setDistInc(Exprs.DistInc);
Dir->setPrevEnsureUpperBound(Exprs.PrevEUB);
Dir->setCounters(Exprs.Counters);
Dir->setPrivateCounters(Exprs.PrivateCounters);
Dir->setInits(Exprs.Inits);
@ -2383,6 +2387,15 @@ OMPTeamsGenericLoopDirective *OMPTeamsGenericLoopDirective::Create(
Dir->setDependentInits(Exprs.DependentInits);
Dir->setFinalsConditions(Exprs.FinalsConditions);
Dir->setPreInits(Exprs.PreInits);
Dir->setCombinedLowerBoundVariable(Exprs.DistCombinedFields.LB);
Dir->setCombinedUpperBoundVariable(Exprs.DistCombinedFields.UB);
Dir->setCombinedEnsureUpperBound(Exprs.DistCombinedFields.EUB);
Dir->setCombinedInit(Exprs.DistCombinedFields.Init);
Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
return Dir;
}
@ -2418,6 +2431,10 @@ OMPTargetTeamsGenericLoopDirective *OMPTargetTeamsGenericLoopDirective::Create(
Dir->setNextLowerBound(Exprs.NLB);
Dir->setNextUpperBound(Exprs.NUB);
Dir->setNumIterations(Exprs.NumIterations);
Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
Dir->setDistInc(Exprs.DistInc);
Dir->setPrevEnsureUpperBound(Exprs.PrevEUB);
Dir->setCounters(Exprs.Counters);
Dir->setPrivateCounters(Exprs.PrivateCounters);
Dir->setInits(Exprs.Inits);
@ -2427,6 +2444,15 @@ OMPTargetTeamsGenericLoopDirective *OMPTargetTeamsGenericLoopDirective::Create(
Dir->setDependentInits(Exprs.DependentInits);
Dir->setFinalsConditions(Exprs.FinalsConditions);
Dir->setPreInits(Exprs.PreInits);
Dir->setCombinedLowerBoundVariable(Exprs.DistCombinedFields.LB);
Dir->setCombinedUpperBoundVariable(Exprs.DistCombinedFields.UB);
Dir->setCombinedEnsureUpperBound(Exprs.DistCombinedFields.EUB);
Dir->setCombinedInit(Exprs.DistCombinedFields.Init);
Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
return Dir;
}

View File

@ -603,7 +603,9 @@ bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) {
DKind == OMPD_teams_distribute_parallel_for_simd ||
DKind == OMPD_teams_distribute_parallel_for ||
DKind == OMPD_target_teams_distribute_parallel_for ||
DKind == OMPD_target_teams_distribute_parallel_for_simd;
DKind == OMPD_target_teams_distribute_parallel_for_simd ||
DKind == OMPD_parallel_loop || DKind == OMPD_teams_loop ||
DKind == OMPD_target_parallel_loop || DKind == OMPD_target_teams_loop;
}
bool clang::isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind) {
@ -632,7 +634,8 @@ bool clang::isOpenMPParallelDirective(OpenMPDirectiveKind DKind) {
DKind == OMPD_parallel_master_taskloop_simd ||
DKind == OMPD_parallel_masked_taskloop ||
DKind == OMPD_parallel_masked_taskloop_simd ||
DKind == OMPD_parallel_loop || DKind == OMPD_target_parallel_loop;
DKind == OMPD_parallel_loop || DKind == OMPD_target_parallel_loop ||
DKind == OMPD_teams_loop;
}
bool clang::isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind) {
@ -729,7 +732,8 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) {
Kind == OMPD_teams_distribute_parallel_for_simd ||
Kind == OMPD_teams_distribute_parallel_for ||
Kind == OMPD_target_teams_distribute_parallel_for ||
Kind == OMPD_target_teams_distribute_parallel_for_simd;
Kind == OMPD_target_teams_distribute_parallel_for_simd ||
Kind == OMPD_teams_loop || Kind == OMPD_target_teams_loop;
}
bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) {
@ -766,7 +770,6 @@ void clang::getOpenMPCaptureRegions(
case OMPD_target_teams:
case OMPD_target_teams_distribute:
case OMPD_target_teams_distribute_simd:
case OMPD_target_teams_loop:
CaptureRegions.push_back(OMPD_task);
CaptureRegions.push_back(OMPD_target);
CaptureRegions.push_back(OMPD_teams);
@ -781,6 +784,7 @@ void clang::getOpenMPCaptureRegions(
CaptureRegions.push_back(OMPD_task);
CaptureRegions.push_back(OMPD_target);
break;
case OMPD_teams_loop:
case OMPD_teams_distribute_parallel_for:
case OMPD_teams_distribute_parallel_for_simd:
CaptureRegions.push_back(OMPD_teams);
@ -815,6 +819,7 @@ void clang::getOpenMPCaptureRegions(
CaptureRegions.push_back(OMPD_parallel);
CaptureRegions.push_back(OMPD_taskloop);
break;
case OMPD_target_teams_loop:
case OMPD_target_teams_distribute_parallel_for:
case OMPD_target_teams_distribute_parallel_for_simd:
CaptureRegions.push_back(OMPD_task);
@ -822,9 +827,6 @@ void clang::getOpenMPCaptureRegions(
CaptureRegions.push_back(OMPD_teams);
CaptureRegions.push_back(OMPD_parallel);
break;
case OMPD_teams_loop:
CaptureRegions.push_back(OMPD_teams);
break;
case OMPD_nothing:
CaptureRegions.push_back(OMPD_nothing);
break;

View File

@ -2812,7 +2812,7 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
const StaticRTInput &Values) {
OpenMPSchedType ScheduleNum = getRuntimeSchedule(
ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
assert(isOpenMPWorksharingDirective(DKind) &&
assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
"Expected loop-based or sections-based directive.");
llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
isOpenMPLoopDirective(DKind)
@ -6206,6 +6206,7 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
DefaultVal = -1;
return nullptr;
}
case OMPD_target_teams_loop:
case OMPD_target_teams:
case OMPD_target_teams_distribute:
case OMPD_target_teams_distribute_simd:
@ -6225,12 +6226,14 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
case OMPD_target_parallel:
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
case OMPD_target_parallel_loop:
case OMPD_target_simd:
DefaultVal = 1;
return nullptr;
case OMPD_parallel:
case OMPD_for:
case OMPD_parallel_for:
case OMPD_parallel_loop:
case OMPD_parallel_master:
case OMPD_parallel_sections:
case OMPD_for_simd:
@ -6447,6 +6450,8 @@ const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
return ThreadLimit;
}
return nullptr;
case OMPD_target_teams_loop:
case OMPD_target_parallel_loop:
case OMPD_target_parallel:
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
@ -6649,6 +6654,8 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal))
return NumThreads;
return Bld.getInt32(0);
case OMPD_target_teams_loop:
case OMPD_target_parallel_loop:
case OMPD_target_parallel:
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
@ -9072,7 +9079,8 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
switch (D.getDirectiveKind()) {
case OMPD_target:
if (isOpenMPDistributeDirective(DKind))
// For now, just treat 'target teams loop' as if it's distributed.
if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
return NestedDir;
if (DKind == OMPD_teams) {
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
@ -9556,7 +9564,8 @@ llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
OpenMPDirectiveKind Kind = D.getDirectiveKind();
const OMPExecutableDirective *TD = &D;
// Get nested teams distribute kind directive, if any.
if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
Kind != OMPD_target_teams_loop)
TD = getNestedDistributeDirective(CGM.getContext(), D);
if (!TD)
return llvm::ConstantInt::get(CGF.Int64Ty, 0);
@ -9945,6 +9954,14 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
CGM, ParentName,
cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
break;
case OMPD_target_teams_loop:
CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
break;
case OMPD_target_parallel_loop:
CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
break;
case OMPD_parallel:
case OMPD_for:
case OMPD_parallel_for:

View File

@ -653,6 +653,8 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
case OMPD_target:
case OMPD_target_teams:
return hasNestedSPMDDirective(Ctx, D);
case OMPD_target_teams_loop:
case OMPD_target_parallel_loop:
case OMPD_target_parallel:
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:

View File

@ -416,16 +416,19 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
EmitOMPGenericLoopDirective(cast<OMPGenericLoopDirective>(*S));
break;
case Stmt::OMPTeamsGenericLoopDirectiveClass:
llvm_unreachable("teams loop directive not supported yet.");
EmitOMPTeamsGenericLoopDirective(cast<OMPTeamsGenericLoopDirective>(*S));
break;
case Stmt::OMPTargetTeamsGenericLoopDirectiveClass:
llvm_unreachable("target teams loop directive not supported yet.");
EmitOMPTargetTeamsGenericLoopDirective(
cast<OMPTargetTeamsGenericLoopDirective>(*S));
break;
case Stmt::OMPParallelGenericLoopDirectiveClass:
llvm_unreachable("parallel loop directive not supported yet.");
EmitOMPParallelGenericLoopDirective(
cast<OMPParallelGenericLoopDirective>(*S));
break;
case Stmt::OMPTargetParallelGenericLoopDirectiveClass:
llvm_unreachable("target parallel loop directive not supported yet.");
EmitOMPTargetParallelGenericLoopDirective(
cast<OMPTargetParallelGenericLoopDirective>(*S));
break;
case Stmt::OMPParallelMaskedDirectiveClass:
EmitOMPParallelMaskedDirective(cast<OMPParallelMaskedDirective>(*S));

View File

@ -7852,6 +7852,148 @@ void CodeGenFunction::EmitOMPGenericLoopDirective(
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen);
}
void CodeGenFunction::EmitOMPParallelGenericLoopDirective(
const OMPLoopDirective &S) {
// Emit combined directive as if its consituent constructs are 'parallel'
// and 'for'.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
emitOMPCopyinClause(CGF, S);
(void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
};
{
auto LPCRegion =
CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
emitEmptyBoundParameters);
}
// Check for outer lastprivate conditional update.
checkForLastprivateConditionalUpdate(*this, S);
}
void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
const OMPTeamsGenericLoopDirective &S) {
// To be consistent with current behavior of 'target teams loop', emit
// 'teams loop' as if its constituent constructs are 'distribute,
// 'parallel, and 'for'.
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
S.getDistInc());
};
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
PrePostActionTy &Action) {
Action.Enter(CGF);
OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
CodeGenDistribute);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
};
emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
emitPostUpdateForReductionClause(*this, S,
[](CodeGenFunction &) { return nullptr; });
}
static void
emitTargetTeamsGenericLoopRegion(CodeGenFunction &CGF,
const OMPTargetTeamsGenericLoopDirective &S,
PrePostActionTy &Action) {
Action.Enter(CGF);
// Emit 'teams loop' as if its constituent constructs are 'distribute,
// 'parallel, and 'for'.
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
S.getDistInc());
};
// Emit teams region as a standalone region.
auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
PrePostActionTy &Action) {
Action.Enter(CGF);
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
};
emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
CodeGenTeams);
emitPostUpdateForReductionClause(CGF, S,
[](CodeGenFunction &) { return nullptr; });
}
/// Emit combined directive 'target teams loop' as if its constituent
/// constructs are 'target', 'teams', 'distribute', 'parallel', and 'for'.
void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective(
const OMPTargetTeamsGenericLoopDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
emitTargetTeamsGenericLoopRegion(CGF, S, Action);
};
emitCommonOMPTargetDirective(*this, S, CodeGen);
}
void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
CodeGenModule &CGM, StringRef ParentName,
const OMPTargetTeamsGenericLoopDirective &S) {
// Emit SPMD target parallel loop region as a standalone region.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
emitTargetTeamsGenericLoopRegion(CGF, S, Action);
};
llvm::Function *Fn;
llvm::Constant *Addr;
// Emit target region as a standalone region.
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
assert(Fn && Addr &&
"Target device function emission failed for 'target teams loop'.");
}
static void emitTargetParallelGenericLoopRegion(
CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S,
PrePostActionTy &Action) {
Action.Enter(CGF);
// Emit as 'parallel for'.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
CodeGenFunction::OMPCancelStackRAII CancelRegion(
CGF, OMPD_target_parallel_loop, /*hasCancel=*/false);
CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
emitDispatchForLoopBounds);
};
emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
emitEmptyBoundParameters);
}
void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
CodeGenModule &CGM, StringRef ParentName,
const OMPTargetParallelGenericLoopDirective &S) {
// Emit target parallel loop region as a standalone region.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
emitTargetParallelGenericLoopRegion(CGF, S, Action);
};
llvm::Function *Fn;
llvm::Constant *Addr;
// Emit target region as a standalone region.
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
assert(Fn && Addr && "Target device function emission failed.");
}
/// Emit combined directive 'target parallel loop' as if its constituent
/// constructs are 'target', 'parallel', and 'for'.
void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective(
const OMPTargetParallelGenericLoopDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
emitTargetParallelGenericLoopRegion(CGF, S, Action);
};
emitCommonOMPTargetDirective(*this, S, CodeGen);
}
void CodeGenFunction::EmitSimpleOMPExecutableDirective(
const OMPExecutableDirective &D) {
if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {

View File

@ -3580,6 +3580,12 @@ public:
void EmitOMPTargetTeamsDistributeSimdDirective(
const OMPTargetTeamsDistributeSimdDirective &S);
void EmitOMPGenericLoopDirective(const OMPGenericLoopDirective &S);
void EmitOMPParallelGenericLoopDirective(const OMPLoopDirective &S);
void EmitOMPTargetParallelGenericLoopDirective(
const OMPTargetParallelGenericLoopDirective &S);
void EmitOMPTargetTeamsGenericLoopDirective(
const OMPTargetTeamsGenericLoopDirective &S);
void EmitOMPTeamsGenericLoopDirective(const OMPTeamsGenericLoopDirective &S);
void EmitOMPInteropDirective(const OMPInteropDirective &S);
void EmitOMPParallelMaskedDirective(const OMPParallelMaskedDirective &S);
@ -3620,6 +3626,16 @@ public:
CodeGenModule &CGM, StringRef ParentName,
const OMPTargetTeamsDistributeParallelForSimdDirective &S);
/// Emit device code for the target teams loop directive.
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(
CodeGenModule &CGM, StringRef ParentName,
const OMPTargetTeamsGenericLoopDirective &S);
/// Emit device code for the target parallel loop directive.
static void EmitOMPTargetParallelGenericLoopDeviceFunction(
CodeGenModule &CGM, StringRef ParentName,
const OMPTargetParallelGenericLoopDirective &S);
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
CodeGenModule &CGM, StringRef ParentName,
const OMPTargetTeamsDistributeParallelForDirective &S);

View File

@ -4199,7 +4199,6 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
case OMPD_target_parallel:
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
case OMPD_target_teams_loop:
case OMPD_target_parallel_loop:
case OMPD_target_teams_distribute:
case OMPD_target_teams_distribute_simd: {
@ -4448,6 +4447,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
Params);
break;
}
case OMPD_target_teams_loop:
case OMPD_target_teams_distribute_parallel_for:
case OMPD_target_teams_distribute_parallel_for_simd: {
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1).withConst();
@ -4506,22 +4506,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
break;
}
case OMPD_teams_loop: {
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1).withConst();
QualType KmpInt32PtrTy =
Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
Sema::CapturedParamNameType ParamsTeams[] = {
std::make_pair(".global_tid.", KmpInt32PtrTy),
std::make_pair(".bound_tid.", KmpInt32PtrTy),
std::make_pair(StringRef(), QualType()) // __context with shared vars
};
// Start a captured region for 'teams'.
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
ParamsTeams, /*OpenMPCaptureLevel=*/0);
break;
}
case OMPD_teams_loop:
case OMPD_teams_distribute_parallel_for:
case OMPD_teams_distribute_parallel_for_simd: {
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1).withConst();
@ -15381,6 +15366,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
break;
}
[[fallthrough]];
case OMPD_target_teams_loop:
case OMPD_target_teams_distribute_parallel_for:
// If this clause applies to the nested 'parallel' region, capture within
// the 'teams' region, otherwise do not capture.
@ -15473,7 +15459,6 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_target:
case OMPD_target_teams:
case OMPD_target_teams_distribute:
case OMPD_target_teams_loop:
case OMPD_distribute_parallel_for:
case OMPD_task:
case OMPD_taskloop:

View File

@ -0,0 +1,117 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]"
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp %s
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
// Check same results after serialization round-trip
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
void foo(int t) {
int i, j, z;
#pragma omp loop collapse(2) reduction(+:z) lastprivate(j) bind(thread)
for (int i = 0; i<t; ++i)
for (j = 0; j<t; ++j)
z += i+j;
}
#endif
// IR-LABEL: define {{[^@]+}}@_Z3fooi
// IR-SAME: (i32 noundef [[T:%.*]]) #[[ATTR0:[0-9]+]] {
// IR-NEXT: entry:
// IR-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4
// IR-NEXT: [[I:%.*]] = alloca i32, align 4
// IR-NEXT: [[J:%.*]] = alloca i32, align 4
// IR-NEXT: [[Z:%.*]] = alloca i32, align 4
// IR-NEXT: [[I1:%.*]] = alloca i32, align 4
// IR-NEXT: store i32 [[T]], ptr [[T_ADDR]], align 4
// IR-NEXT: store i32 0, ptr [[I1]], align 4
// IR-NEXT: br label [[FOR_COND:%.*]]
// IR: for.cond:
// IR-NEXT: [[TMP0:%.*]] = load i32, ptr [[I1]], align 4
// IR-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_ADDR]], align 4
// IR-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP1]]
// IR-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END8:%.*]]
// IR: for.body:
// IR-NEXT: store i32 0, ptr [[J]], align 4
// IR-NEXT: br label [[FOR_COND2:%.*]]
// IR: for.cond2:
// IR-NEXT: [[TMP2:%.*]] = load i32, ptr [[J]], align 4
// IR-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_ADDR]], align 4
// IR-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP2]], [[TMP3]]
// IR-NEXT: br i1 [[CMP3]], label [[FOR_BODY4:%.*]], label [[FOR_END:%.*]]
// IR: for.body4:
// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[I1]], align 4
// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[J]], align 4
// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[Z]], align 4
// IR-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP6]], [[ADD]]
// IR-NEXT: store i32 [[ADD5]], ptr [[Z]], align 4
// IR-NEXT: br label [[FOR_INC:%.*]]
// IR: for.inc:
// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4
// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
// IR-NEXT: store i32 [[INC]], ptr [[J]], align 4
// IR-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP3:![0-9]+]]
// IR: for.end:
// IR-NEXT: br label [[FOR_INC6:%.*]]
// IR: for.inc6:
// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[I1]], align 4
// IR-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP8]], 1
// IR-NEXT: store i32 [[INC7]], ptr [[I1]], align 4
// IR-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
// IR: for.end8:
// IR-NEXT: ret void
//
//
// IR-PCH-LABEL: define {{[^@]+}}@_Z3fooi
// IR-PCH-SAME: (i32 noundef [[T:%.*]]) #[[ATTR0:[0-9]+]] {
// IR-PCH-NEXT: entry:
// IR-PCH-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[Z:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[I1:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: store i32 [[T]], ptr [[T_ADDR]], align 4
// IR-PCH-NEXT: store i32 0, ptr [[I1]], align 4
// IR-PCH-NEXT: br label [[FOR_COND:%.*]]
// IR-PCH: for.cond:
// IR-PCH-NEXT: [[TMP0:%.*]] = load i32, ptr [[I1]], align 4
// IR-PCH-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_ADDR]], align 4
// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP1]]
// IR-PCH-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END8:%.*]]
// IR-PCH: for.body:
// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4
// IR-PCH-NEXT: br label [[FOR_COND2:%.*]]
// IR-PCH: for.cond2:
// IR-PCH-NEXT: [[TMP2:%.*]] = load i32, ptr [[J]], align 4
// IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_ADDR]], align 4
// IR-PCH-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP2]], [[TMP3]]
// IR-PCH-NEXT: br i1 [[CMP3]], label [[FOR_BODY4:%.*]], label [[FOR_END:%.*]]
// IR-PCH: for.body4:
// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[I1]], align 4
// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[J]], align 4
// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[Z]], align 4
// IR-PCH-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP6]], [[ADD]]
// IR-PCH-NEXT: store i32 [[ADD5]], ptr [[Z]], align 4
// IR-PCH-NEXT: br label [[FOR_INC:%.*]]
// IR-PCH: for.inc:
// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4
// IR-PCH-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
// IR-PCH-NEXT: store i32 [[INC]], ptr [[J]], align 4
// IR-PCH-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP3:![0-9]+]]
// IR-PCH: for.end:
// IR-PCH-NEXT: br label [[FOR_INC6:%.*]]
// IR-PCH: for.inc6:
// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[I1]], align 4
// IR-PCH-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP8]], 1
// IR-PCH-NEXT: store i32 [[INC7]], ptr [[I1]], align 4
// IR-PCH-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
// IR-PCH: for.end8:
// IR-PCH-NEXT: ret void
//

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,525 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// Test target codegen - host bc file has to be created first.
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2
// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
int a;
int foo(int *a);
int main(int argc, char **argv) {
#pragma omp target teams loop map(tofrom:a) if(target:argc)
for (int i= 0; i < argc; ++i)
a = foo(&i) + foo(&a) + foo(&argc);
return 0;
}
#endif
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24
// CHECK1-SAME: (i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP18]], ptr [[ARGC_CASTED]], align 4
// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8
// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr
// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8
// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr
// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8
// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr
// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8
// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8
// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]]
// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK1: cond.true10:
// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: br label [[COND_END12:%.*]]
// CHECK1: cond.false11:
// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: br label [[COND_END12]]
// CHECK1: cond.end12:
// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ]
// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP41]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32
// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32
// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64
// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]]
// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR5:[0-9]+]]
// CHECK1-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR5]]
// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]]
// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR5]]
// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]]
// CHECK1-NEXT: store i32 [[ADD10]], ptr [[TMP0]], align 4
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP16]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24
// CHECK2-SAME: (i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP16]], ptr [[ARGC_CASTED]], align 4
// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4
// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to ptr
// CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 4
// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to ptr
// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4
// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to ptr
// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4
// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4
// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], [[TMP28]]
// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]]
// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK2: cond.true10:
// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: br label [[COND_END12:%.*]]
// CHECK2: cond.false11:
// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: br label [[COND_END12]]
// CHECK2: cond.end12:
// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE10]] ], [ [[TMP36]], [[COND_FALSE11]] ]
// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP39]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4
// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
// CHECK2-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]]
// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK2-NEXT: store i32 [[ADD]], ptr [[I3]], align 4
// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I3]]) #[[ATTR5:[0-9]+]]
// CHECK2-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR5]]
// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[CALL]], [[CALL5]]
// CHECK2-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR5]]
// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD6]], [[CALL7]]
// CHECK2-NEXT: store i32 [[ADD8]], ptr [[TMP0]], align 4
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP16]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: ret void
//

View File

@ -0,0 +1,224 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp %s
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
// Check same results after serialization round-trip
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
#define N 64
int foo() {
int x = 0;
int result[N] = {0};
#pragma omp parallel loop num_threads(N) allocate(x) private(x) collapse(2)
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++)
result[i] = i + j + x;
return 0;
}
#endif
// IR-LABEL: define {{[^@]+}}@_Z3foov
// IR-SAME: () #[[ATTR0:[0-9]+]] {
// IR-NEXT: entry:
// IR-NEXT: [[X:%.*]] = alloca i32, align 4
// IR-NEXT: [[RESULT:%.*]] = alloca [64 x i32], align 16
// IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
// IR-NEXT: store i32 0, ptr [[X]], align 4
// IR-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[RESULT]], i8 0, i64 256, i1 false)
// IR-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP0]], i32 64)
// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @_Z3foov.omp_outlined, ptr [[RESULT]])
// IR-NEXT: ret i32 0
//
//
// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined
// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[RESULT:%.*]]) #[[ATTR2:[0-9]+]] {
// IR-NEXT: entry:
// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
// IR-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// IR-NEXT: [[I:%.*]] = alloca i32, align 4
// IR-NEXT: [[J:%.*]] = alloca i32, align 4
// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// IR-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR]], align 8
// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8
// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// IR-NEXT: store i32 4095, ptr [[DOTOMP_UB]], align 4
// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
// IR-NEXT: [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr null)
// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// IR-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 4095
// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// IR: cond.true:
// IR-NEXT: br label [[COND_END:%.*]]
// IR: cond.false:
// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-NEXT: br label [[COND_END]]
// IR: cond.end:
// IR-NEXT: [[COND:%.*]] = phi i32 [ 4095, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// IR-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// IR: omp.inner.for.cond:
// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// IR-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
// IR: omp.inner.for.cond.cleanup:
// IR-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// IR: omp.inner.for.body:
// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 64
// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// IR-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 64
// IR-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 64
// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]]
// IR-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// IR-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
// IR-NEXT: store i32 [[ADD6]], ptr [[J]], align 4
// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4
// IR-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
// IR-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP13]]
// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4
// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64
// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// IR-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX]], align 4
// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// IR: omp.body.continue:
// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// IR: omp.inner.for.inc:
// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1
// IR-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
// IR-NEXT: br label [[OMP_INNER_FOR_COND]]
// IR: omp.inner.for.end:
// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// IR: omp.loop.exit:
// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
// IR-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], ptr null)
// IR-NEXT: ret void
//
//
// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov
// IR-PCH-SAME: () #[[ATTR0:[0-9]+]] {
// IR-PCH-NEXT: entry:
// IR-PCH-NEXT: [[X:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[RESULT:%.*]] = alloca [64 x i32], align 16
// IR-PCH-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
// IR-PCH-NEXT: store i32 0, ptr [[X]], align 4
// IR-PCH-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[RESULT]], i8 0, i64 256, i1 false)
// IR-PCH-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP0]], i32 64)
// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @_Z3foov.omp_outlined, ptr [[RESULT]])
// IR-PCH-NEXT: ret i32 0
//
//
// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined
// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[RESULT:%.*]]) #[[ATTR2:[0-9]+]] {
// IR-PCH-NEXT: entry:
// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// IR-PCH-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR]], align 8
// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8
// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// IR-PCH-NEXT: store i32 4095, ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-PCH-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
// IR-PCH-NEXT: [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr null)
// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 4095
// IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// IR-PCH: cond.true:
// IR-PCH-NEXT: br label [[COND_END:%.*]]
// IR-PCH: cond.false:
// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: br label [[COND_END]]
// IR-PCH: cond.end:
// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 4095, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// IR-PCH-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// IR-PCH: omp.inner.for.cond:
// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// IR-PCH-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
// IR-PCH: omp.inner.for.cond.cleanup:
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// IR-PCH: omp.inner.for.body:
// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 64
// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-PCH-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 64
// IR-PCH-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 64
// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]]
// IR-PCH-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// IR-PCH-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
// IR-PCH-NEXT: store i32 [[ADD6]], ptr [[J]], align 4
// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4
// IR-PCH-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
// IR-PCH-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP13]]
// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4
// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64
// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// IR-PCH-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX]], align 4
// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// IR-PCH: omp.body.continue:
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// IR-PCH: omp.inner.for.inc:
// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-PCH-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1
// IR-PCH-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
// IR-PCH: omp.inner.for.end:
// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// IR-PCH: omp.loop.exit:
// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
// IR-PCH-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], ptr null)
// IR-PCH-NEXT: ret void
//

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,952 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 2
// Test host codegen.
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-X86
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK-X86
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0-X86 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0-X86 %s
// Test target parallel for codegen - host bc file has to be created first.
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK-TARGET
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK-TARGET
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK-TARGET-X86
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK-TARGET-X86
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1-TARGET %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1-TARGET %s
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1-TARGET-X86 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1-TARGET-X86 %s
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
int nested(int a){
#pragma omp target parallel loop
for (int i = 0; i < 10; ++i)
++a;
auto F = [&](){
#pragma omp parallel
{
#pragma omp target parallel loop
for (int i = 0; i < 10; ++i)
++a;
}
};
F();
return a;
}
// Check metadata is properly generated:
#endif
// CHECK-LABEL: define dso_local noundef signext i32 @_Z6nestedi
// CHECK-SAME: (i32 noundef signext [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-NEXT: [[F:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
// CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK-NEXT: store ptr null, ptr [[TMP4]], align 8
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-NEXT: store i32 2, ptr [[TMP7]], align 4
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-NEXT: store i32 1, ptr [[TMP8]], align 4
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8
// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8
// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-NEXT: store ptr null, ptr [[TMP13]], align 8
// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-NEXT: store ptr null, ptr [[TMP14]], align 8
// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-NEXT: store i64 0, ptr [[TMP15]], align 8
// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-NEXT: store i64 0, ptr [[TMP16]], align 8
// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP17]], align 4
// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4
// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-NEXT: store i32 0, ptr [[TMP19]], align 4
// CHECK-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.region_id, ptr [[KERNEL_ARGS]])
// CHECK-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
// CHECK-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK: omp_offload.failed:
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42(i64 [[TMP1]]) #[[ATTR3:[0-9]+]]
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK: omp_offload.cont:
// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[F]], i32 0, i32 0
// CHECK-NEXT: store ptr [[A_ADDR]], ptr [[TMP22]], align 8
// CHECK-NEXT: call void @"_ZZ6nestediENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[F]])
// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK-NEXT: ret i32 [[TMP23]]
//
//
// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42
// CHECK-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined, i64 [[TMP1]])
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// CHECK-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK: cond.true:
// CHECK-NEXT: br label [[COND_END:%.*]]
// CHECK: cond.false:
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: br label [[COND_END]]
// CHECK: cond.end:
// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK: omp.inner.for.cond:
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK: omp.inner.for.body:
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
// CHECK-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK: omp.body.continue:
// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK: omp.inner.for.inc:
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK: omp.inner.for.end:
// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK: omp.loop.exit:
// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49
// CHECK-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined, i64 [[TMP1]])
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR2]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// CHECK-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK: cond.true:
// CHECK-NEXT: br label [[COND_END:%.*]]
// CHECK: cond.false:
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: br label [[COND_END]]
// CHECK: cond.end:
// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK: omp.inner.for.cond:
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK: omp.inner.for.body:
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
// CHECK-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK: omp.body.continue:
// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK: omp.inner.for.inc:
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK: omp.inner.for.end:
// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK: omp.loop.exit:
// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define internal void @.omp_offloading.requires_reg
// CHECK-SAME: () #[[ATTR4:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK-NEXT: ret void
//
//
// CHECK-X86-LABEL: define dso_local noundef i32 @_Z6nestedi
// CHECK-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-X86-NEXT: entry:
// CHECK-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
// CHECK-X86-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
// CHECK-X86-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
// CHECK-X86-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-X86-NEXT: [[F:%.*]] = alloca [[CLASS_ANON:%.*]], align 4
// CHECK-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK-X86-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// CHECK-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4
// CHECK-X86-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-X86-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4
// CHECK-X86-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-X86-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4
// CHECK-X86-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK-X86-NEXT: store ptr null, ptr [[TMP4]], align 4
// CHECK-X86-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-X86-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-X86-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-X86-NEXT: store i32 2, ptr [[TMP7]], align 4
// CHECK-X86-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-X86-NEXT: store i32 1, ptr [[TMP8]], align 4
// CHECK-X86-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-X86-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4
// CHECK-X86-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-X86-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
// CHECK-X86-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-X86-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4
// CHECK-X86-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-X86-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4
// CHECK-X86-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-X86-NEXT: store ptr null, ptr [[TMP13]], align 4
// CHECK-X86-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-X86-NEXT: store ptr null, ptr [[TMP14]], align 4
// CHECK-X86-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-X86-NEXT: store i64 0, ptr [[TMP15]], align 8
// CHECK-X86-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-X86-NEXT: store i64 0, ptr [[TMP16]], align 8
// CHECK-X86-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-X86-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP17]], align 4
// CHECK-X86-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-X86-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4
// CHECK-X86-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-X86-NEXT: store i32 0, ptr [[TMP19]], align 4
// CHECK-X86-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.region_id, ptr [[KERNEL_ARGS]])
// CHECK-X86-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
// CHECK-X86-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK-X86: omp_offload.failed:
// CHECK-X86-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42(i32 [[TMP1]]) #[[ATTR3:[0-9]+]]
// CHECK-X86-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK-X86: omp_offload.cont:
// CHECK-X86-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[F]], i32 0, i32 0
// CHECK-X86-NEXT: store ptr [[A_ADDR]], ptr [[TMP22]], align 4
// CHECK-X86-NEXT: call void @"_ZZ6nestediENK3$_0clEv"(ptr noundef nonnull align 4 dereferenceable(4) [[F]])
// CHECK-X86-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK-X86-NEXT: ret i32 [[TMP23]]
//
//
// CHECK-X86-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42
// CHECK-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-X86-NEXT: entry:
// CHECK-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK-X86-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// CHECK-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4
// CHECK-X86-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined, i32 [[TMP1]])
// CHECK-X86-NEXT: ret void
//
//
// CHECK-X86-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined
// CHECK-X86-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK-X86-NEXT: entry:
// CHECK-X86-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK-X86-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-X86-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK-X86-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// CHECK-X86-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
// CHECK-X86-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK-X86-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK-X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-X86-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-X86-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-X86: cond.true:
// CHECK-X86-NEXT: br label [[COND_END:%.*]]
// CHECK-X86: cond.false:
// CHECK-X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-X86-NEXT: br label [[COND_END]]
// CHECK-X86: cond.end:
// CHECK-X86-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
// CHECK-X86-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// CHECK-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK-X86-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
// CHECK-X86-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-X86: omp.inner.for.cond:
// CHECK-X86-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-X86-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-X86-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-X86: omp.inner.for.body:
// CHECK-X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-X86-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-X86-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK-X86-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// CHECK-X86-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK-X86-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
// CHECK-X86-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
// CHECK-X86-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-X86: omp.body.continue:
// CHECK-X86-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-X86: omp.inner.for.inc:
// CHECK-X86-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-X86-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK-X86-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK-X86-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-X86: omp.inner.for.end:
// CHECK-X86-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-X86: omp.loop.exit:
// CHECK-X86-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK-X86-NEXT: ret void
//
//
// CHECK-X86-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49
// CHECK-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR1]] {
// CHECK-X86-NEXT: entry:
// CHECK-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK-X86-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// CHECK-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4
// CHECK-X86-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined, i32 [[TMP1]])
// CHECK-X86-NEXT: ret void
//
//
// CHECK-X86-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined
// CHECK-X86-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR2]] {
// CHECK-X86-NEXT: entry:
// CHECK-X86-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK-X86-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-X86-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-X86-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK-X86-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// CHECK-X86-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
// CHECK-X86-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK-X86-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK-X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-X86-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-X86-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-X86: cond.true:
// CHECK-X86-NEXT: br label [[COND_END:%.*]]
// CHECK-X86: cond.false:
// CHECK-X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-X86-NEXT: br label [[COND_END]]
// CHECK-X86: cond.end:
// CHECK-X86-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
// CHECK-X86-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// CHECK-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK-X86-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
// CHECK-X86-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-X86: omp.inner.for.cond:
// CHECK-X86-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-X86-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-X86-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-X86: omp.inner.for.body:
// CHECK-X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-X86-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-X86-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK-X86-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// CHECK-X86-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK-X86-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
// CHECK-X86-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
// CHECK-X86-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-X86: omp.body.continue:
// CHECK-X86-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-X86: omp.inner.for.inc:
// CHECK-X86-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-X86-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK-X86-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK-X86-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-X86: omp.inner.for.end:
// CHECK-X86-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-X86: omp.loop.exit:
// CHECK-X86-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK-X86-NEXT: ret void
//
//
// CHECK-X86-LABEL: define internal void @.omp_offloading.requires_reg
// CHECK-X86-SAME: () #[[ATTR4:[0-9]+]] {
// CHECK-X86-NEXT: entry:
// CHECK-X86-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK-X86-NEXT: ret void
//
//
// SIMD-ONLY0-LABEL: define dso_local noundef signext i32 @_Z6nestedi
// SIMD-ONLY0-SAME: (i32 noundef signext [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY0-NEXT: entry:
// SIMD-ONLY0-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT: [[I:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT: [[F:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
// SIMD-ONLY0-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// SIMD-ONLY0-NEXT: store i32 0, ptr [[I]], align 4
// SIMD-ONLY0-NEXT: br label [[FOR_COND:%.*]]
// SIMD-ONLY0: for.cond:
// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10
// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
// SIMD-ONLY0: for.body:
// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// SIMD-ONLY0-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY0-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
// SIMD-ONLY0-NEXT: br label [[FOR_INC:%.*]]
// SIMD-ONLY0: for.inc:
// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
// SIMD-ONLY0-NEXT: [[INC1:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY0-NEXT: store i32 [[INC1]], ptr [[I]], align 4
// SIMD-ONLY0-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
// SIMD-ONLY0: for.end:
// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[F]], i32 0, i32 0
// SIMD-ONLY0-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 8
// SIMD-ONLY0-NEXT: call void @"_ZZ6nestediENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[F]])
// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4
// SIMD-ONLY0-NEXT: ret i32 [[TMP4]]
//
//
// SIMD-ONLY0-X86-LABEL: define dso_local noundef i32 @_Z6nestedi
// SIMD-ONLY0-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY0-X86-NEXT: entry:
// SIMD-ONLY0-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY0-X86-NEXT: [[I:%.*]] = alloca i32, align 4
// SIMD-ONLY0-X86-NEXT: [[F:%.*]] = alloca [[CLASS_ANON:%.*]], align 4
// SIMD-ONLY0-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// SIMD-ONLY0-X86-NEXT: store i32 0, ptr [[I]], align 4
// SIMD-ONLY0-X86-NEXT: br label [[FOR_COND:%.*]]
// SIMD-ONLY0-X86: for.cond:
// SIMD-ONLY0-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
// SIMD-ONLY0-X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10
// SIMD-ONLY0-X86-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
// SIMD-ONLY0-X86: for.body:
// SIMD-ONLY0-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// SIMD-ONLY0-X86-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY0-X86-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
// SIMD-ONLY0-X86-NEXT: br label [[FOR_INC:%.*]]
// SIMD-ONLY0-X86: for.inc:
// SIMD-ONLY0-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
// SIMD-ONLY0-X86-NEXT: [[INC1:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY0-X86-NEXT: store i32 [[INC1]], ptr [[I]], align 4
// SIMD-ONLY0-X86-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// SIMD-ONLY0-X86: for.end:
// SIMD-ONLY0-X86-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[F]], i32 0, i32 0
// SIMD-ONLY0-X86-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 4
// SIMD-ONLY0-X86-NEXT: call void @"_ZZ6nestediENK3$_0clEv"(ptr noundef nonnull align 4 dereferenceable(4) [[F]])
// SIMD-ONLY0-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4
// SIMD-ONLY0-X86-NEXT: ret i32 [[TMP4]]
//
//
// TCHECK-TARGET-LABEL: define weak_odr protected void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42
// TCHECK-TARGET-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// TCHECK-TARGET-NEXT: entry:
// TCHECK-TARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK-TARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// TCHECK-TARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// TCHECK-TARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
// TCHECK-TARGET-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// TCHECK-TARGET-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
// TCHECK-TARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined, i64 [[TMP1]])
// TCHECK-TARGET-NEXT: ret void
//
//
// TCHECK-TARGET-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined
// TCHECK-TARGET-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
// TCHECK-TARGET-NEXT: entry:
// TCHECK-TARGET-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// TCHECK-TARGET-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// TCHECK-TARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK-TARGET-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// TCHECK-TARGET-NEXT: [[TMP:%.*]] = alloca i32, align 4
// TCHECK-TARGET-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// TCHECK-TARGET-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// TCHECK-TARGET-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// TCHECK-TARGET-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// TCHECK-TARGET-NEXT: [[I:%.*]] = alloca i32, align 4
// TCHECK-TARGET-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// TCHECK-TARGET-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// TCHECK-TARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// TCHECK-TARGET-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// TCHECK-TARGET-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// TCHECK-TARGET-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// TCHECK-TARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// TCHECK-TARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// TCHECK-TARGET-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// TCHECK-TARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// TCHECK-TARGET-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// TCHECK-TARGET: cond.true:
// TCHECK-TARGET-NEXT: br label [[COND_END:%.*]]
// TCHECK-TARGET: cond.false:
// TCHECK-TARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-NEXT: br label [[COND_END]]
// TCHECK-TARGET: cond.end:
// TCHECK-TARGET-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
// TCHECK-TARGET-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// TCHECK-TARGET-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// TCHECK-TARGET: omp.inner.for.cond:
// TCHECK-TARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// TCHECK-TARGET-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// TCHECK-TARGET: omp.inner.for.body:
// TCHECK-TARGET-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// TCHECK-TARGET-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// TCHECK-TARGET-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// TCHECK-TARGET-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4
// TCHECK-TARGET-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
// TCHECK-TARGET-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
// TCHECK-TARGET-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// TCHECK-TARGET: omp.body.continue:
// TCHECK-TARGET-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// TCHECK-TARGET: omp.inner.for.inc:
// TCHECK-TARGET-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
// TCHECK-TARGET-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-NEXT: br label [[OMP_INNER_FOR_COND]]
// TCHECK-TARGET: omp.inner.for.end:
// TCHECK-TARGET-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// TCHECK-TARGET: omp.loop.exit:
// TCHECK-TARGET-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
// TCHECK-TARGET-NEXT: ret void
//
//
// TCHECK-TARGET-LABEL: define weak_odr protected void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49
// TCHECK-TARGET-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0]] {
// TCHECK-TARGET-NEXT: entry:
// TCHECK-TARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK-TARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// TCHECK-TARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// TCHECK-TARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
// TCHECK-TARGET-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// TCHECK-TARGET-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
// TCHECK-TARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined, i64 [[TMP1]])
// TCHECK-TARGET-NEXT: ret void
//
//
// TCHECK-TARGET-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined
// TCHECK-TARGET-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] {
// TCHECK-TARGET-NEXT: entry:
// TCHECK-TARGET-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// TCHECK-TARGET-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// TCHECK-TARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK-TARGET-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// TCHECK-TARGET-NEXT: [[TMP:%.*]] = alloca i32, align 4
// TCHECK-TARGET-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// TCHECK-TARGET-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// TCHECK-TARGET-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// TCHECK-TARGET-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// TCHECK-TARGET-NEXT: [[I:%.*]] = alloca i32, align 4
// TCHECK-TARGET-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// TCHECK-TARGET-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// TCHECK-TARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// TCHECK-TARGET-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// TCHECK-TARGET-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// TCHECK-TARGET-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// TCHECK-TARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// TCHECK-TARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// TCHECK-TARGET-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// TCHECK-TARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// TCHECK-TARGET-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// TCHECK-TARGET: cond.true:
// TCHECK-TARGET-NEXT: br label [[COND_END:%.*]]
// TCHECK-TARGET: cond.false:
// TCHECK-TARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-NEXT: br label [[COND_END]]
// TCHECK-TARGET: cond.end:
// TCHECK-TARGET-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
// TCHECK-TARGET-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// TCHECK-TARGET-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// TCHECK-TARGET: omp.inner.for.cond:
// TCHECK-TARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// TCHECK-TARGET-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// TCHECK-TARGET: omp.inner.for.body:
// TCHECK-TARGET-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// TCHECK-TARGET-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// TCHECK-TARGET-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// TCHECK-TARGET-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4
// TCHECK-TARGET-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
// TCHECK-TARGET-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
// TCHECK-TARGET-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// TCHECK-TARGET: omp.body.continue:
// TCHECK-TARGET-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// TCHECK-TARGET: omp.inner.for.inc:
// TCHECK-TARGET-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
// TCHECK-TARGET-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-NEXT: br label [[OMP_INNER_FOR_COND]]
// TCHECK-TARGET: omp.inner.for.end:
// TCHECK-TARGET-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// TCHECK-TARGET: omp.loop.exit:
// TCHECK-TARGET-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
// TCHECK-TARGET-NEXT: ret void
//
//
// TCHECK-TARGET-X86-LABEL: define weak_odr protected void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42
// TCHECK-TARGET-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// TCHECK-TARGET-X86-NEXT: entry:
// TCHECK-TARGET-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// TCHECK-TARGET-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
// TCHECK-TARGET-X86-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// TCHECK-TARGET-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4
// TCHECK-TARGET-X86-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined, i32 [[TMP1]])
// TCHECK-TARGET-X86-NEXT: ret void
//
//
// TCHECK-TARGET-X86-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined
// TCHECK-TARGET-X86-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
// TCHECK-TARGET-X86-NEXT: entry:
// TCHECK-TARGET-X86-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// TCHECK-TARGET-X86-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// TCHECK-TARGET-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: [[TMP:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: [[I:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// TCHECK-TARGET-X86-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// TCHECK-TARGET-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// TCHECK-TARGET-X86-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// TCHECK-TARGET-X86-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-X86-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// TCHECK-TARGET-X86-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// TCHECK-TARGET-X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// TCHECK-TARGET-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// TCHECK-TARGET-X86-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// TCHECK-TARGET-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// TCHECK-TARGET-X86-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// TCHECK-TARGET-X86: cond.true:
// TCHECK-TARGET-X86-NEXT: br label [[COND_END:%.*]]
// TCHECK-TARGET-X86: cond.false:
// TCHECK-TARGET-X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-X86-NEXT: br label [[COND_END]]
// TCHECK-TARGET-X86: cond.end:
// TCHECK-TARGET-X86-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
// TCHECK-TARGET-X86-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// TCHECK-TARGET-X86-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-X86-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// TCHECK-TARGET-X86: omp.inner.for.cond:
// TCHECK-TARGET-X86-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-X86-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// TCHECK-TARGET-X86-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// TCHECK-TARGET-X86: omp.inner.for.body:
// TCHECK-TARGET-X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-X86-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// TCHECK-TARGET-X86-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// TCHECK-TARGET-X86-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// TCHECK-TARGET-X86-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4
// TCHECK-TARGET-X86-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
// TCHECK-TARGET-X86-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
// TCHECK-TARGET-X86-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// TCHECK-TARGET-X86: omp.body.continue:
// TCHECK-TARGET-X86-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// TCHECK-TARGET-X86: omp.inner.for.inc:
// TCHECK-TARGET-X86-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-X86-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
// TCHECK-TARGET-X86-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-X86-NEXT: br label [[OMP_INNER_FOR_COND]]
// TCHECK-TARGET-X86: omp.inner.for.end:
// TCHECK-TARGET-X86-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// TCHECK-TARGET-X86: omp.loop.exit:
// TCHECK-TARGET-X86-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
// TCHECK-TARGET-X86-NEXT: ret void
//
//
// TCHECK-TARGET-X86-LABEL: define weak_odr protected void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49
// TCHECK-TARGET-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0]] {
// TCHECK-TARGET-X86-NEXT: entry:
// TCHECK-TARGET-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// TCHECK-TARGET-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
// TCHECK-TARGET-X86-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// TCHECK-TARGET-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4
// TCHECK-TARGET-X86-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined, i32 [[TMP1]])
// TCHECK-TARGET-X86-NEXT: ret void
//
//
// TCHECK-TARGET-X86-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined
// TCHECK-TARGET-X86-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] {
// TCHECK-TARGET-X86-NEXT: entry:
// TCHECK-TARGET-X86-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// TCHECK-TARGET-X86-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// TCHECK-TARGET-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: [[TMP:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: [[I:%.*]] = alloca i32, align 4
// TCHECK-TARGET-X86-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// TCHECK-TARGET-X86-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// TCHECK-TARGET-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// TCHECK-TARGET-X86-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// TCHECK-TARGET-X86-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-X86-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// TCHECK-TARGET-X86-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// TCHECK-TARGET-X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// TCHECK-TARGET-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// TCHECK-TARGET-X86-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// TCHECK-TARGET-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// TCHECK-TARGET-X86-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// TCHECK-TARGET-X86: cond.true:
// TCHECK-TARGET-X86-NEXT: br label [[COND_END:%.*]]
// TCHECK-TARGET-X86: cond.false:
// TCHECK-TARGET-X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-X86-NEXT: br label [[COND_END]]
// TCHECK-TARGET-X86: cond.end:
// TCHECK-TARGET-X86-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
// TCHECK-TARGET-X86-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// TCHECK-TARGET-X86-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-X86-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// TCHECK-TARGET-X86: omp.inner.for.cond:
// TCHECK-TARGET-X86-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// TCHECK-TARGET-X86-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// TCHECK-TARGET-X86-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// TCHECK-TARGET-X86: omp.inner.for.body:
// TCHECK-TARGET-X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-X86-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// TCHECK-TARGET-X86-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// TCHECK-TARGET-X86-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// TCHECK-TARGET-X86-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4
// TCHECK-TARGET-X86-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
// TCHECK-TARGET-X86-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
// TCHECK-TARGET-X86-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// TCHECK-TARGET-X86: omp.body.continue:
// TCHECK-TARGET-X86-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// TCHECK-TARGET-X86: omp.inner.for.inc:
// TCHECK-TARGET-X86-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-X86-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
// TCHECK-TARGET-X86-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// TCHECK-TARGET-X86-NEXT: br label [[OMP_INNER_FOR_COND]]
// TCHECK-TARGET-X86: omp.inner.for.end:
// TCHECK-TARGET-X86-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// TCHECK-TARGET-X86: omp.loop.exit:
// TCHECK-TARGET-X86-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
// TCHECK-TARGET-X86-NEXT: ret void
//
//
// SIMD-ONLY1-TARGET-LABEL: define dso_local noundef signext i32 @_Z6nestedi
// SIMD-ONLY1-TARGET-SAME: (i32 noundef signext [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY1-TARGET-NEXT: entry:
// SIMD-ONLY1-TARGET-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY1-TARGET-NEXT: [[I:%.*]] = alloca i32, align 4
// SIMD-ONLY1-TARGET-NEXT: [[F:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
// SIMD-ONLY1-TARGET-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// SIMD-ONLY1-TARGET-NEXT: store i32 0, ptr [[I]], align 4
// SIMD-ONLY1-TARGET-NEXT: br label [[FOR_COND:%.*]]
// SIMD-ONLY1-TARGET: for.cond:
// SIMD-ONLY1-TARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
// SIMD-ONLY1-TARGET-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10
// SIMD-ONLY1-TARGET-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
// SIMD-ONLY1-TARGET: for.body:
// SIMD-ONLY1-TARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// SIMD-ONLY1-TARGET-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY1-TARGET-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
// SIMD-ONLY1-TARGET-NEXT: br label [[FOR_INC:%.*]]
// SIMD-ONLY1-TARGET: for.inc:
// SIMD-ONLY1-TARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
// SIMD-ONLY1-TARGET-NEXT: [[INC1:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY1-TARGET-NEXT: store i32 [[INC1]], ptr [[I]], align 4
// SIMD-ONLY1-TARGET-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
// SIMD-ONLY1-TARGET: for.end:
// SIMD-ONLY1-TARGET-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[F]], i32 0, i32 0
// SIMD-ONLY1-TARGET-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 8
// SIMD-ONLY1-TARGET-NEXT: call void @"_ZZ6nestediENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[F]])
// SIMD-ONLY1-TARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4
// SIMD-ONLY1-TARGET-NEXT: ret i32 [[TMP4]]
//
//
// SIMD-ONLY1-TARGET-X86-LABEL: define dso_local noundef i32 @_Z6nestedi
// SIMD-ONLY1-TARGET-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY1-TARGET-X86-NEXT: entry:
// SIMD-ONLY1-TARGET-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY1-TARGET-X86-NEXT: [[I:%.*]] = alloca i32, align 4
// SIMD-ONLY1-TARGET-X86-NEXT: [[F:%.*]] = alloca [[CLASS_ANON:%.*]], align 4
// SIMD-ONLY1-TARGET-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// SIMD-ONLY1-TARGET-X86-NEXT: store i32 0, ptr [[I]], align 4
// SIMD-ONLY1-TARGET-X86-NEXT: br label [[FOR_COND:%.*]]
// SIMD-ONLY1-TARGET-X86: for.cond:
// SIMD-ONLY1-TARGET-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
// SIMD-ONLY1-TARGET-X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10
// SIMD-ONLY1-TARGET-X86-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
// SIMD-ONLY1-TARGET-X86: for.body:
// SIMD-ONLY1-TARGET-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// SIMD-ONLY1-TARGET-X86-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY1-TARGET-X86-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
// SIMD-ONLY1-TARGET-X86-NEXT: br label [[FOR_INC:%.*]]
// SIMD-ONLY1-TARGET-X86: for.inc:
// SIMD-ONLY1-TARGET-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
// SIMD-ONLY1-TARGET-X86-NEXT: [[INC1:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY1-TARGET-X86-NEXT: store i32 [[INC1]], ptr [[I]], align 4
// SIMD-ONLY1-TARGET-X86-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// SIMD-ONLY1-TARGET-X86: for.end:
// SIMD-ONLY1-TARGET-X86-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[F]], i32 0, i32 0
// SIMD-ONLY1-TARGET-X86-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 4
// SIMD-ONLY1-TARGET-X86-NEXT: call void @"_ZZ6nestediENK3$_0clEv"(ptr noundef nonnull align 4 dereferenceable(4) [[F]])
// SIMD-ONLY1-TARGET-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4
// SIMD-ONLY1-TARGET-X86-NEXT: ret i32 [[TMP4]]
//

View File

@ -0,0 +1,921 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s --check-prefix=CHECK1
// expected-no-diagnostics
int main() {
/* int(*b)[a]; */
/* int *(**c)[a]; */
bool bb;
int a;
int b[10][10];
int c[10][10][10];
#pragma omp target parallel loop firstprivate(a, b) map(tofrom \
: c) map(tofrom \
: bb) if (a)
for (int i = 0; i < 10; ++i) {
int &f = c[1][1][1];
int &g = a;
int &h = b[1][1];
int d = 15;
a = 5;
b[0][a] = 10;
c[0][0][a] = 11;
b[0][a] = c[0][0][a];
bb |= b[0][a];
}
#pragma omp target parallel loop firstprivate(a) map(tofrom \
: c, b) map(to \
: bb)
for (int i = 0; i < 10; ++i) {
int &f = c[1][1][1];
int &g = a;
int &h = b[1][1];
int d = 15;
a = 5;
b[0][a] = 10;
c[0][0][a] = 11;
b[0][a] = c[0][0][a];
d = bb;
}
#pragma omp target parallel loop map(tofrom \
: a, c, b) map(from \
: bb)
for (int i = 0; i < 10; ++i) {
int &f = c[1][1][1];
int &g = a;
int &h = b[1][1];
int d = 15;
a = 5;
b[0][a] = 10;
c[0][0][a] = 11;
b[0][a] = c[0][0][a];
bb = b[0][a];
}
return 0;
}
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__
// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]], i1 noundef zeroext [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG13:![0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i8, align 1
// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META31:![0-9]+]], metadata !DIExpression()), !dbg [[DBG32:![0-9]+]]
// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META33:![0-9]+]], metadata !DIExpression()), !dbg [[DBG34:![0-9]+]]
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META35:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36:![0-9]+]]
// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG38:![0-9]+]]
// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[DOTCAPTURE_EXPR_]] to i8
// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR__ADDR]], metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40:![0-9]+]]
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG41:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG41]]
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG41]]
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG41]]
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG41]]
// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG41]]
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG41]]
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG41]]
// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG41]]
// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG41]]
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG41]]
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG41]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG41]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG41]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]])
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG42:![0-9]+]]
// CHECK1-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4, !dbg [[DBG42]]
// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG42]]
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG42]]
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG42]]
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG42]]
// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG42]]
// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG42]]
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG42]]
// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG42]]
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG42]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG42]]
// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG43:![0-9]+]]
// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG43]]
// CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG42]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB8:[0-9]+]], i8 2), !dbg [[DBG45:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG46:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG41]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined_debug__
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG47:![0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[B4:%.*]] = alloca [10 x [10 x i32]], align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META54:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55:![0-9]+]]
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META56:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]]
// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META57:![0-9]+]], metadata !DIExpression()), !dbg [[DBG58:![0-9]+]]
// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60:![0-9]+]]
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META61:![0-9]+]], metadata !DIExpression()), !dbg [[DBG62:![0-9]+]]
// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG64:![0-9]+]]
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG65:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG65]]
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG65]]
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG65]]
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG65]]
// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG65]]
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG65]]
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG65]]
// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG65]]
// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG65]]
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG65]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]]
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68:![0-9]+]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]]
// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]]
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]]
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG68]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B4]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]]
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG65]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]]
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG65]]
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !dbg [[DBG65]]
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG74:![0-9]+]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG65]]
// CHECK1: omp.dispatch.cond:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9, !dbg [[DBG68]]
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG68]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG68]]
// CHECK1: cond.false:
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG68]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ], !dbg [[DBG68]]
// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68]]
// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]], !dbg [[DBG65]]
// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG65]]
// CHECK1: omp.dispatch.body:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG65]]
// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]], !dbg [[DBG65]]
// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG65]]
// CHECK1: omp.inner.for.body:
// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1, !dbg [[DBG75:![0-9]+]]
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG75]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG75]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG80:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG80]]
// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG80]]
// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[DBG79]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META81:![0-9]+]], metadata !DIExpression()), !dbg [[DBG82:![0-9]+]]
// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG82]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG84:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 1, !dbg [[DBG85:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG85]]
// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[DBG84]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META86:![0-9]+]], metadata !DIExpression()), !dbg [[DBG87:![0-9]+]]
// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG87]]
// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG88:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG89:![0-9]+]]
// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG90:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG89]]
// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG89]]
// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG91:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG92:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG92]]
// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG93:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG92]]
// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG92]]
// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG94:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG95:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG95]]
// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG96:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG95]]
// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG95]]
// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG95]]
// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG97:![0-9]+]]
// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG98:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG97]]
// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG97]]
// CHECK1-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG99:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG100:![0-9]+]]
// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG101:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG100]]
// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG100]]
// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG100]]
// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG102:![0-9]+]]
// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP25]] to i1, !dbg [[DBG102]]
// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG102]]
// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP24]], !dbg [[DBG102]]
// CHECK1-NEXT: [[TOBOOL27:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG102]]
// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL27]] to i8, !dbg [[DBG102]]
// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP7]], align 1, !dbg [[DBG102]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG103:![0-9]+]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG74]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP26]], 1, !dbg [[DBG65]]
// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG65]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG74]], !llvm.loop [[LOOP104:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG74]]
// CHECK1: omp.dispatch.inc:
// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68]]
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]]
// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP27]], [[TMP28]], !dbg [[DBG65]]
// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG65]]
// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]]
// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG65]]
// CHECK1-NEXT: store i32 [[ADD30]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG65]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG74]], !llvm.loop [[LOOP106:![0-9]+]]
// CHECK1: omp.dispatch.end:
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5:[0-9]+]], i32 [[TMP9]]), !dbg [[DBG105:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG107:![0-9]+]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG108:![0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116:![0-9]+]]
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]]
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]]
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META119:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]]
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]]
// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META121:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]]
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG122:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG122]]
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG122]]
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG122]]
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG122]]
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG122]]
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG122]]
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG122]]
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG122]]
// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG122]]
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG122]]
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr [[TMP7]], ptr addrspace(1) [[TMP10]]) #[[ATTR3:[0-9]+]], !dbg [[DBG122]]
// CHECK1-NEXT: ret void, !dbg [[DBG122]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13
// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR5:[0-9]+]] !dbg [[DBG123:![0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]]
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META128:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]]
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]]
// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META130:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]]
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR__ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]]
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG132:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG132]]
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG132]]
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG132]]
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG132]]
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG132]]
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG132]]
// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG132]]
// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1, !dbg [[DBG132]]
// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG132]]
// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG132]]
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__(ptr addrspace(1) [[TMP8]], i32 [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP9]], i1 [[TOBOOL]]) #[[ATTR3]], !dbg [[DBG132]]
// CHECK1-NEXT: ret void, !dbg [[DBG132]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__
// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG133:![0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG139:![0-9]+]]
// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META140:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141:![0-9]+]]
// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG143:![0-9]+]]
// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG145:![0-9]+]]
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG146:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG146]]
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG146]]
// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG146]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB10:[0-9]+]], i8 2, i1 false), !dbg [[DBG146]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG146]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG146]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG147:![0-9]+]]
// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG147]]
// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG147]]
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG147]]
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8, !dbg [[DBG147]]
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG147]]
// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG147]]
// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG147]]
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG147]]
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG147]]
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG147]]
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG147]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG148:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG150:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG146]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined_debug__
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG151:![0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META154:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155:![0-9]+]]
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META156:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]]
// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG158:![0-9]+]]
// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META159:![0-9]+]], metadata !DIExpression()), !dbg [[DBG160:![0-9]+]]
// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META161:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]]
// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META163:![0-9]+]], metadata !DIExpression()), !dbg [[DBG164:![0-9]+]]
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG165:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG165]]
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG165]]
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG165]]
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG165]]
// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG165]]
// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG165]]
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG165]]
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG165]]
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG165]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG165]]
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG165]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META166:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META167:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]]
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG168:![0-9]+]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]]
// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]]
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG168]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]]
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG168]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]]
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG165]]
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG165]]
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG165]]
// CHECK1: omp.dispatch.cond:
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]]
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG168]]
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG168]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG168]]
// CHECK1: cond.false:
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]]
// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG168]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG168]]
// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]]
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG168]]
// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]]
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]]
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]]
// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG165]]
// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG165]]
// CHECK1: omp.dispatch.body:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG165]]
// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]]
// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]]
// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG165]]
// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG165]]
// CHECK1: omp.inner.for.body:
// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]]
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG174:![0-9]+]]
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG174]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG174]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META175:![0-9]+]], metadata !DIExpression()), !dbg [[DBG177:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG178:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG178]]
// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG178]]
// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[F]], align 8, !dbg [[DBG177]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META179:![0-9]+]], metadata !DIExpression()), !dbg [[DBG180:![0-9]+]]
// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG180]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META181:![0-9]+]], metadata !DIExpression()), !dbg [[DBG182:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 1, !dbg [[DBG183:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 1, !dbg [[DBG183]]
// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[H]], align 8, !dbg [[DBG182]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META184:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185:![0-9]+]]
// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG185]]
// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG186:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG187:![0-9]+]]
// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG188:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG187]]
// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM]], !dbg [[DBG187]]
// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX11]], align 4, !dbg [[DBG189:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG190:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX12]], i64 0, i64 0, !dbg [[DBG190]]
// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG191:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG190]]
// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX13]], i64 0, i64 [[IDXPROM14]], !dbg [[DBG190]]
// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX15]], align 4, !dbg [[DBG192:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG193:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX16]], i64 0, i64 0, !dbg [[DBG193]]
// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG194:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG193]]
// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG193]]
// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !dbg [[DBG193]]
// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG195:![0-9]+]]
// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG196:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG195]]
// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG195]]
// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX22]], align 4, !dbg [[DBG197:![0-9]+]]
// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG198:![0-9]+]]
// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP24]] to i1, !dbg [[DBG198]]
// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG198]]
// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG199:![0-9]+]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG200:![0-9]+]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG173]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]]
// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP25]], 1, !dbg [[DBG165]]
// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG165]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG173]], !llvm.loop [[LOOP201:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG173]]
// CHECK1: omp.dispatch.inc:
// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG168]]
// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG168]]
// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP26]], [[TMP27]], !dbg [[DBG165]]
// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG165]]
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]]
// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG168]]
// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]], !dbg [[DBG165]]
// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG165]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG173]], !llvm.loop [[LOOP203:![0-9]+]]
// CHECK1: omp.dispatch.end:
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB14:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG204:![0-9]+]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG205:![0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META206:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207:![0-9]+]]
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META208:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]]
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META209:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]]
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META210:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]]
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META211:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]]
// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META212:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]]
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG213:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG213]]
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG213]]
// CHECK1-NEXT: ret void, !dbg [[DBG213]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27
// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG214:![0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META217:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218:![0-9]+]]
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META219:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218]]
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META220:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218]]
// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META221:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218]]
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG222:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG222]]
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG222]]
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG222]]
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG222]]
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG222]]
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG222]]
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG222]]
// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG222]]
// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG222]]
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__(ptr addrspace(1) [[TMP7]], i32 [[TMP4]], ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]]) #[[ATTR3]], !dbg [[DBG222]]
// CHECK1-NEXT: ret void, !dbg [[DBG222]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__
// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG223:![0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META228:![0-9]+]], metadata !DIExpression()), !dbg [[DBG229:![0-9]+]]
// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META230:![0-9]+]], metadata !DIExpression()), !dbg [[DBG231:![0-9]+]]
// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG233:![0-9]+]]
// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META234:![0-9]+]], metadata !DIExpression()), !dbg [[DBG235:![0-9]+]]
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG236:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG236]]
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG236]]
// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG236]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG236]]
// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB19:[0-9]+]], i8 2, i1 false), !dbg [[DBG236]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG236]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG236]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB24:[0-9]+]])
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG237:![0-9]+]]
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG237]]
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG237]]
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG237]]
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG237]]
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG237]]
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG237]]
// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG237]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB26:[0-9]+]], i8 2), !dbg [[DBG238:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG240:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG236]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined_debug__
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG241:![0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META244:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245:![0-9]+]]
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META246:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]]
// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META247:![0-9]+]], metadata !DIExpression()), !dbg [[DBG248:![0-9]+]]
// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META249:![0-9]+]], metadata !DIExpression()), !dbg [[DBG250:![0-9]+]]
// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META251:![0-9]+]], metadata !DIExpression()), !dbg [[DBG252:![0-9]+]]
// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META253:![0-9]+]], metadata !DIExpression()), !dbg [[DBG254:![0-9]+]]
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG255:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG255]]
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG255]]
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG255]]
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG255]]
// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG255]]
// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG255]]
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG255]]
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG255]]
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG255]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG255]]
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG255]]
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG255]]
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG255]]
// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG255]]
// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG255]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META256:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META257:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]]
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG258:![0-9]+]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META259:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]]
// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META260:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]]
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG258]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META261:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]]
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG258]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META262:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]]
// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG255]]
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG255]]
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB21:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG255]]
// CHECK1: omp.dispatch.cond:
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]]
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 9, !dbg [[DBG258]]
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG258]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG258]]
// CHECK1: cond.false:
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]]
// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG258]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ], !dbg [[DBG258]]
// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]]
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG258]]
// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]]
// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]]
// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]]
// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]], !dbg [[DBG255]]
// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG255]]
// CHECK1: omp.dispatch.body:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG255]]
// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]]
// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]]
// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]], !dbg [[DBG255]]
// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG255]]
// CHECK1: omp.inner.for.body:
// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]]
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1, !dbg [[DBG264:![0-9]+]]
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG264]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG264]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META265:![0-9]+]], metadata !DIExpression()), !dbg [[DBG267:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG268:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG268]]
// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG268]]
// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[DBG267]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG270:![0-9]+]]
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[G]], align 8, !dbg [[DBG270]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META271:![0-9]+]], metadata !DIExpression()), !dbg [[DBG272:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 1, !dbg [[DBG273:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG273]]
// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[DBG272]]
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META274:![0-9]+]], metadata !DIExpression()), !dbg [[DBG275:![0-9]+]]
// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG275]]
// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4, !dbg [[DBG276:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG277:![0-9]+]]
// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG278:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG277]]
// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG277]]
// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG279:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG280:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG280]]
// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG281:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG280]]
// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG280]]
// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG282:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG283:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG283]]
// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG284:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG283]]
// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG283]]
// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG283]]
// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG285:![0-9]+]]
// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG286:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP26]] to i64, !dbg [[DBG285]]
// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG285]]
// CHECK1-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG287:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG288:![0-9]+]]
// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG289:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP27]] to i64, !dbg [[DBG288]]
// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG288]]
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG288]]
// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0, !dbg [[DBG288]]
// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG290:![0-9]+]]
// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP11]], align 1, !dbg [[DBG290]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG291:![0-9]+]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG263]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]]
// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP29]], 1, !dbg [[DBG255]]
// CHECK1-NEXT: store i32 [[ADD27]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG255]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG263]], !llvm.loop [[LOOP292:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG263]]
// CHECK1: omp.dispatch.inc:
// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG258]]
// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG258]]
// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP30]], [[TMP31]], !dbg [[DBG255]]
// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG255]]
// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]]
// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG258]]
// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP32]], [[TMP33]], !dbg [[DBG255]]
// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG255]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG263]], !llvm.loop [[LOOP294:![0-9]+]]
// CHECK1: omp.dispatch.end:
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB23:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG295:![0-9]+]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG296:![0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300:![0-9]+]]
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META301:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]]
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META302:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]]
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META303:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]]
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META304:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]]
// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]]
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG306:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG306]]
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG306]]
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG306]]
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG306]]
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG306]]
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG306]]
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG306]]
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG306]]
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG306]]
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG306]]
// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG306]]
// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG306]]
// CHECK1-NEXT: [[TMP13:%.*]] = addrspacecast ptr [[TMP9]] to ptr addrspace(1), !dbg [[DBG306]]
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined_debug__(ptr [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]], ptr addrspace(1) [[TMP13]]) #[[ATTR3]], !dbg [[DBG306]]
// CHECK1-NEXT: ret void, !dbg [[DBG306]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41
// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG307:![0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META310:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311:![0-9]+]]
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META312:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]]
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META313:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]]
// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META314:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]]
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG315:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG315]]
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG315]]
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG315]]
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG315]]
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG315]]
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG315]]
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG315]]
// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG315]]
// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG315]]
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG315]]
// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG315]]
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__(ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG315]]
// CHECK1-NEXT: ret void, !dbg [[DBG315]]
//

View File

@ -0,0 +1,387 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=IR-GPU
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
// Check same results after serialization round-trip
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
typedef void **omp_allocator_handle_t;
extern const omp_allocator_handle_t omp_null_allocator;
extern const omp_allocator_handle_t omp_default_mem_alloc;
extern const omp_allocator_handle_t omp_large_cap_mem_alloc;
extern const omp_allocator_handle_t omp_const_mem_alloc;
extern const omp_allocator_handle_t omp_high_bw_mem_alloc;
extern const omp_allocator_handle_t omp_low_lat_mem_alloc;
extern const omp_allocator_handle_t omp_cgroup_mem_alloc;
extern const omp_allocator_handle_t omp_pteam_mem_alloc;
extern const omp_allocator_handle_t omp_thread_mem_alloc;
extern int omp_get_thread_num(void);
#define N 64
int main() {
int x = 0;
int device_result[N] = {0};
#pragma omp target parallel loop num_threads(N) uses_allocators(omp_pteam_mem_alloc) allocate(omp_pteam_mem_alloc: x) private(x) map(from: device_result)
for (int i = 0; i < N; i++) {
x = omp_get_thread_num();
device_result[i] = i + x;
}
}
#endif
// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37
// IR-GPU-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR0:[0-9]+]] {
// IR-GPU-NEXT: entry:
// IR-GPU-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// IR-GPU-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
// IR-GPU-NEXT: [[DEVICE_RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEVICE_RESULT_ADDR]] to ptr
// IR-GPU-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OMP_PTEAM_MEM_ALLOC_ADDR]] to ptr
// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
// IR-GPU-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8
// IR-GPU-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST]], align 8
// IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8
// IR-GPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false)
// IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// IR-GPU: user_code.entry:
// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST]], align 8
// IR-GPU-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// IR-GPU-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8
// IR-GPU-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// IR-GPU-NEXT: store ptr [[TMP3]], ptr [[TMP5]], align 8
// IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 64, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// IR-GPU-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2)
// IR-GPU-NEXT: ret void
// IR-GPU: worker.exit:
// IR-GPU-NEXT: ret void
//
//
// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_omp_outlined
// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR1:[0-9]+]] {
// IR-GPU-NEXT: entry:
// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// IR-GPU-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// IR-GPU-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
// IR-GPU-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
// IR-GPU-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
// IR-GPU-NEXT: [[DEVICE_RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEVICE_RESULT_ADDR]] to ptr
// IR-GPU-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OMP_PTEAM_MEM_ALLOC_ADDR]] to ptr
// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
// IR-GPU-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
// IR-GPU-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
// IR-GPU-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8
// IR-GPU-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST]], align 8
// IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8
// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
// IR-GPU-NEXT: store i32 63, ptr [[DOTOMP_UB_ASCAST]], align 4
// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// IR-GPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
// IR-GPU-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP2]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// IR-GPU-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// IR-GPU: omp.dispatch.cond:
// IR-GPU-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// IR-GPU-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 63
// IR-GPU-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// IR-GPU: cond.true:
// IR-GPU-NEXT: br label [[COND_END:%.*]]
// IR-GPU: cond.false:
// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// IR-GPU-NEXT: br label [[COND_END]]
// IR-GPU: cond.end:
// IR-GPU-NEXT: [[COND:%.*]] = phi i32 [ 63, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
// IR-GPU-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// IR-GPU-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// IR-GPU-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// IR-GPU: omp.dispatch.body:
// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// IR-GPU: omp.inner.for.cond:
// IR-GPU-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// IR-GPU-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// IR-GPU-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// IR-GPU: omp.inner.for.body:
// IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// IR-GPU-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// IR-GPU-NEXT: [[CALL:%.*]] = call noundef i32 @_Z18omp_get_thread_numv() #[[ATTR5:[0-9]+]]
// IR-GPU-NEXT: store i32 [[CALL]], ptr addrspacecast (ptr addrspace(3) @x to ptr), align 4
// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4
// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @x to ptr), align 4
// IR-GPU-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_ASCAST]], align 4
// IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64
// IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// IR-GPU-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
// IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// IR-GPU: omp.body.continue:
// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// IR-GPU: omp.inner.for.inc:
// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// IR-GPU-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1
// IR-GPU-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]]
// IR-GPU: omp.inner.for.end:
// IR-GPU-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// IR-GPU: omp.dispatch.inc:
// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// IR-GPU-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
// IR-GPU-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB_ASCAST]], align 4
// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// IR-GPU-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// IR-GPU-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
// IR-GPU-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB_ASCAST]], align 4
// IR-GPU-NEXT: br label [[OMP_DISPATCH_COND]]
// IR-GPU: omp.dispatch.end:
// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP2]])
// IR-GPU-NEXT: ret void
//
//
// IR-LABEL: define {{[^@]+}}@main
// IR-SAME: () #[[ATTR0:[0-9]+]] {
// IR-NEXT: entry:
// IR-NEXT: [[X:%.*]] = alloca i32, align 4
// IR-NEXT: [[DEVICE_RESULT:%.*]] = alloca [64 x i32], align 16
// IR-NEXT: store i32 0, ptr [[X]], align 4
// IR-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[DEVICE_RESULT]], i8 0, i64 256, i1 false)
// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8
// IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37(ptr [[DEVICE_RESULT]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]]
// IR-NEXT: ret i32 0
//
//
// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37
// IR-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR2:[0-9]+]] {
// IR-NEXT: entry:
// IR-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
// IR-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8
// IR-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8
// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8
// IR-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP0]], i32 64)
// IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8
// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined, ptr [[TMP1]], ptr [[TMP2]])
// IR-NEXT: ret void
//
//
// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined
// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR2]] {
// IR-NEXT: entry:
// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// IR-NEXT: [[I:%.*]] = alloca i32, align 4
// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// IR-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8
// IR-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8
// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8
// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// IR-NEXT: store i32 63, ptr [[DOTOMP_UB]], align 4
// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8
// IR-NEXT: [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr [[TMP3]])
// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 63
// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// IR: cond.true:
// IR-NEXT: br label [[COND_END:%.*]]
// IR: cond.false:
// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-NEXT: br label [[COND_END]]
// IR: cond.end:
// IR-NEXT: [[COND:%.*]] = phi i32 [ 63, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// IR-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// IR: omp.inner.for.cond:
// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// IR-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
// IR: omp.inner.for.cond.cleanup:
// IR-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// IR: omp.inner.for.body:
// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// IR-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// IR-NEXT: [[CALL:%.*]] = call noundef i32 @_Z18omp_get_thread_numv()
// IR-NEXT: store i32 [[CALL]], ptr [[DOTX__VOID_ADDR]], align 4
// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4
// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
// IR-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4
// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// IR-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// IR: omp.body.continue:
// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// IR: omp.inner.for.inc:
// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1
// IR-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
// IR-NEXT: br label [[OMP_INNER_FOR_COND]]
// IR: omp.inner.for.end:
// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// IR: omp.loop.exit:
// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
// IR-NEXT: [[TMP14:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8
// IR-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], ptr [[TMP14]])
// IR-NEXT: ret void
//
//
// IR-PCH-LABEL: define {{[^@]+}}@main
// IR-PCH-SAME: () #[[ATTR0:[0-9]+]] {
// IR-PCH-NEXT: entry:
// IR-PCH-NEXT: [[X:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DEVICE_RESULT:%.*]] = alloca [64 x i32], align 16
// IR-PCH-NEXT: store i32 0, ptr [[X]], align 4
// IR-PCH-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[DEVICE_RESULT]], i8 0, i64 256, i1 false)
// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8
// IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37(ptr [[DEVICE_RESULT]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]]
// IR-PCH-NEXT: ret i32 0
//
//
// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37
// IR-PCH-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR2:[0-9]+]] {
// IR-PCH-NEXT: entry:
// IR-PCH-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
// IR-PCH-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8
// IR-PCH-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8
// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8
// IR-PCH-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP0]], i32 64)
// IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8
// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined, ptr [[TMP1]], ptr [[TMP2]])
// IR-PCH-NEXT: ret void
//
//
// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined
// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR2]] {
// IR-PCH-NEXT: entry:
// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// IR-PCH-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8
// IR-PCH-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8
// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8
// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// IR-PCH-NEXT: store i32 63, ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-PCH-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8
// IR-PCH-NEXT: [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr [[TMP3]])
// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 63
// IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// IR-PCH: cond.true:
// IR-PCH-NEXT: br label [[COND_END:%.*]]
// IR-PCH: cond.false:
// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: br label [[COND_END]]
// IR-PCH: cond.end:
// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 63, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// IR-PCH-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// IR-PCH: omp.inner.for.cond:
// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// IR-PCH-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
// IR-PCH: omp.inner.for.cond.cleanup:
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// IR-PCH: omp.inner.for.body:
// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// IR-PCH-NEXT: [[CALL:%.*]] = call noundef i32 @_Z18omp_get_thread_numv()
// IR-PCH-NEXT: store i32 [[CALL]], ptr [[DOTX__VOID_ADDR]], align 4
// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4
// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
// IR-PCH-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4
// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// IR-PCH-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// IR-PCH: omp.body.continue:
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// IR-PCH: omp.inner.for.inc:
// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-PCH-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1
// IR-PCH-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
// IR-PCH: omp.inner.for.end:
// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// IR-PCH: omp.loop.exit:
// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
// IR-PCH-NEXT: [[TMP14:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8
// IR-PCH-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], ptr [[TMP14]])
// IR-PCH-NEXT: ret void
//

View File

@ -0,0 +1,210 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// Test host codegen.
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// Test target codegen - host bc file has to be created first.
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// Check target registration is registered as a Ctor.
template<typename tx, typename ty>
struct TT{
tx X;
ty Y;
};
int global;
extern int global;
int foo(int n) {
int a = 0;
short aa = 0;
float b[10];
float bn[n];
double c[5][10];
double cn[5][n];
TT<long long, char> d;
static long *plocal;
#pragma omp target parallel loop device(global + a) depend(in: global) depend(out: a, b, cn[4])
for (int i = 0; i < 10; ++i) {
}
#pragma omp target parallel loop device(global + a) nowait depend(inout: global, a, bn) if(target:a)
for (int i = 0; i < *plocal; ++i) {
static int local1;
*plocal = global;
local1 = global;
}
#pragma omp target parallel loop if(0) firstprivate(global) depend(out:global)
for (int i = 0; i < global; ++i) {
global += 1;
}
return a;
}
// Check that the offloading functions are emitted and that the arguments are
// correct and loaded correctly for the target regions in foo().
// CHECK-64: [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
// CHECK-64: store i32 [[BP1_I32]], i32* [[BP1_CAST]],
// CHECK-32: store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
// CHECK-64: [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
// CHECK-64: store i32 [[BP1_I32]], i32* [[BP1_CAST]],
// CHECK-32: store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
// Create stack storage and store argument in there.
// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
// CHECK-64: load i32, i32* [[AA_CADDR]], align
// CHECK-32: load i32, i32* [[AA_ADDR]], align
// CHECK-64: [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
// CHECK-64: store i32 [[BP1_I32]], i32* [[BP1_CAST]],
// CHECK-32: store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
#endif
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66
// CHECK-SAME: () #[[ATTR2:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66.omp_outlined)
// CHECK-NEXT: ret void
//
//
//
//
//
//
//
//
//
//
//
//
//
//
// CHECK-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK-SAME: () #[[ATTR8:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK-NEXT: ret void
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66
// TCHECK-SAME: () #[[ATTR0:[0-9]+]] {
// TCHECK-NEXT: entry:
// TCHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66.omp_outlined)
// TCHECK-NEXT: ret void
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
// SIMD-ONLY0: {{.*}}
// SIMD-ONLY1: {{.*}}

View File

@ -0,0 +1,211 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// Test host codegen.
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
enum omp_allocator_handle_t {
omp_null_allocator = 0,
omp_default_mem_alloc = 1,
omp_large_cap_mem_alloc = 2,
omp_const_mem_alloc = 3,
omp_high_bw_mem_alloc = 4,
omp_low_lat_mem_alloc = 5,
omp_cgroup_mem_alloc = 6,
omp_pteam_mem_alloc = 7,
omp_thread_mem_alloc = 8,
KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
};
typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1,
omp_atk_alignment = 2,
omp_atk_access = 3,
omp_atk_pool_size = 4,
omp_atk_fallback = 5,
omp_atk_fb_data = 6,
omp_atk_pinned = 7,
omp_atk_partition = 8
} omp_alloctrait_key_t;
typedef enum omp_alloctrait_value_t {
omp_atv_false = 0,
omp_atv_true = 1,
omp_atv_default = 2,
omp_atv_contended = 3,
omp_atv_uncontended = 4,
omp_atv_sequential = 5,
omp_atv_private = 6,
omp_atv_all = 7,
omp_atv_thread = 8,
omp_atv_pteam = 9,
omp_atv_cgroup = 10,
omp_atv_default_mem_fb = 11,
omp_atv_null_fb = 12,
omp_atv_abort_fb = 13,
omp_atv_allocator_fb = 14,
omp_atv_environment = 15,
omp_atv_nearest = 16,
omp_atv_blocked = 17,
omp_atv_interleaved = 18
} omp_alloctrait_value_t;
typedef struct omp_alloctrait_t {
omp_alloctrait_key_t key;
__UINTPTR_TYPE__ value;
} omp_alloctrait_t;
// Just map the traits variable as a firstprivate variable.
void foo() {
omp_alloctrait_t traits[10];
omp_allocator_handle_t my_allocator;
#pragma omp target parallel loop uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits))
for (int i = 0; i < 10; ++i)
;
}
// Destroy allocator upon exit from the region.
#endif
// CHECK-LABEL: define {{[^@]+}}@_Z3foov
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TRAITS:%.*]] = alloca [10 x %struct.omp_alloctrait_t], align 8
// CHECK-NEXT: [[MY_ALLOCATOR:%.*]] = alloca i64, align 8
// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-NEXT: store ptr [[TRAITS]], ptr [[TMP0]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-NEXT: store ptr [[TRAITS]], ptr [[TMP1]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK-NEXT: store ptr null, ptr [[TMP2]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-NEXT: store i32 2, ptr [[TMP5]], align 4
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-NEXT: store i32 1, ptr [[TMP6]], align 4
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-NEXT: store ptr null, ptr [[TMP11]], align 8
// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-NEXT: store ptr null, ptr [[TMP12]], align 8
// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-NEXT: store i64 0, ptr [[TMP13]], align 8
// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-NEXT: store i64 0, ptr [[TMP14]], align 8
// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP15]], align 4
// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4
// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-NEXT: store i32 0, ptr [[TMP17]], align 4
// CHECK-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.region_id, ptr [[KERNEL_ARGS]])
// CHECK-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK: omp_offload.failed:
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66(ptr [[TRAITS]]) #[[ATTR2:[0-9]+]]
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK: omp_offload.cont:
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66
// CHECK-SAME: (ptr noundef nonnull align 8 dereferenceable(160) [[TRAITS:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TRAITS_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[MY_ALLOCATOR:%.*]] = alloca i64, align 8
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: store ptr [[TRAITS]], ptr [[TRAITS_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TRAITS_ADDR]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_init_allocator(i32 [[TMP0]], ptr null, i32 10, ptr [[TMP1]])
// CHECK-NEXT: [[CONV:%.*]] = ptrtoint ptr [[TMP2]] to i64
// CHECK-NEXT: store i64 [[CONV]], ptr [[MY_ALLOCATOR]], align 8
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.omp_outlined)
// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[MY_ALLOCATOR]], align 8
// CHECK-NEXT: [[CONV1:%.*]] = inttoptr i64 [[TMP3]] to ptr
// CHECK-NEXT: call void @__kmpc_destroy_allocator(i32 [[TMP0]], ptr [[CONV1]])
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.omp_outlined
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// CHECK-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK: cond.true:
// CHECK-NEXT: br label [[COND_END:%.*]]
// CHECK: cond.false:
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: br label [[COND_END]]
// CHECK: cond.end:
// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK: omp.inner.for.cond:
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK: omp.inner.for.body:
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK: omp.body.continue:
// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK: omp.inner.for.inc:
// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
// CHECK-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK: omp.inner.for.end:
// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK: omp.loop.exit:
// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK-SAME: () #[[ATTR4:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK-NEXT: ret void
//

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,207 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// REQUIRES: powerpc-registered-target
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
void gtid_test() {
#pragma omp target teams loop order(concurrent)
for(int i = 0 ; i < 100; i++) {}
}
#endif
// CHECK1-LABEL: define {{[^@]+}}@_Z9gtid_testv
// CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-NEXT: store i32 2, ptr [[TMP0]], align 4
// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-NEXT: store i32 0, ptr [[TMP1]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-NEXT: store ptr null, ptr [[TMP3]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-NEXT: store i64 100, ptr [[TMP8]], align 8
// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK1-NEXT: store i64 0, ptr [[TMP9]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP10]], align 4
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4
// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16.region_id, ptr [[KERNEL_ARGS]])
// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16() #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1: omp_offload.cont:
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16
// CHECK1-SAME: () #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16.omp_outlined)
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16.omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16.omp_outlined.omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]]
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK1-SAME: () #[[ATTR3:[0-9]+]] section ".text.startup" {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK1-NEXT: ret void
//

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,482 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// Test host codegen.
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
enum omp_allocator_handle_t {
omp_null_allocator = 0,
omp_default_mem_alloc = 1,
omp_large_cap_mem_alloc = 2,
omp_const_mem_alloc = 3,
omp_high_bw_mem_alloc = 4,
omp_low_lat_mem_alloc = 5,
omp_cgroup_mem_alloc = 6,
omp_pteam_mem_alloc = 7,
omp_thread_mem_alloc = 8,
KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
};
typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1,
omp_atk_alignment = 2,
omp_atk_access = 3,
omp_atk_pool_size = 4,
omp_atk_fallback = 5,
omp_atk_fb_data = 6,
omp_atk_pinned = 7,
omp_atk_partition = 8
} omp_alloctrait_key_t;
typedef enum omp_alloctrait_value_t {
omp_atv_false = 0,
omp_atv_true = 1,
omp_atv_default = 2,
omp_atv_contended = 3,
omp_atv_uncontended = 4,
omp_atv_sequential = 5,
omp_atv_private = 6,
omp_atv_all = 7,
omp_atv_thread = 8,
omp_atv_pteam = 9,
omp_atv_cgroup = 10,
omp_atv_default_mem_fb = 11,
omp_atv_null_fb = 12,
omp_atv_abort_fb = 13,
omp_atv_allocator_fb = 14,
omp_atv_environment = 15,
omp_atv_nearest = 16,
omp_atv_blocked = 17,
omp_atv_interleaved = 18
} omp_alloctrait_value_t;
typedef struct omp_alloctrait_t {
omp_alloctrait_key_t key;
__UINTPTR_TYPE__ value;
} omp_alloctrait_t;
// Just map the traits variable as a firstprivate variable.
void foo() {
omp_alloctrait_t traits[10];
omp_allocator_handle_t my_allocator;
#pragma omp target teams loop uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits))
for (int i = 0; i < 10; ++i)
;
}
// Destroy allocator upon exit from the region.
#endif
// CHECK-64-LABEL: define {{[^@]+}}@_Z3foov
// CHECK-64-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[TRAITS:%.*]] = alloca [10 x %struct.omp_alloctrait_t], align 8
// CHECK-64-NEXT: [[MY_ALLOCATOR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT: store ptr [[TRAITS]], ptr [[TMP0]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT: store ptr [[TRAITS]], ptr [[TMP1]], align 8
// CHECK-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK-64-NEXT: store ptr null, ptr [[TMP2]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-64-NEXT: store i32 2, ptr [[TMP5]], align 4
// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-64-NEXT: store i32 1, ptr [[TMP6]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-64-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
// CHECK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-64-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-64-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8
// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-64-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8
// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-64-NEXT: store ptr null, ptr [[TMP11]], align 8
// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-64-NEXT: store ptr null, ptr [[TMP12]], align 8
// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-64-NEXT: store i64 10, ptr [[TMP13]], align 8
// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-64-NEXT: store i64 0, ptr [[TMP14]], align 8
// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4
// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4
// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-64-NEXT: store i32 0, ptr [[TMP17]], align 4
// CHECK-64-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73.region_id, ptr [[KERNEL_ARGS]])
// CHECK-64-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK-64-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK-64: omp_offload.failed:
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73(ptr [[TRAITS]]) #[[ATTR2:[0-9]+]]
// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK-64: omp_offload.cont:
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73
// CHECK-64-SAME: (ptr noundef nonnull align 8 dereferenceable(160) [[TRAITS:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[TRAITS_ADDR:%.*]] = alloca ptr, align 8
// CHECK-64-NEXT: [[MY_ALLOCATOR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: store ptr [[TRAITS]], ptr [[TRAITS_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TRAITS_ADDR]], align 8
// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_init_allocator(i32 [[TMP0]], ptr null, i32 10, ptr [[TMP2]])
// CHECK-64-NEXT: [[CONV:%.*]] = ptrtoint ptr [[TMP3]] to i64
// CHECK-64-NEXT: store i64 [[CONV]], ptr [[MY_ALLOCATOR]], align 8
// CHECK-64-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.)
// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[MY_ALLOCATOR]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = inttoptr i64 [[TMP4]] to ptr
// CHECK-64-NEXT: call void @__kmpc_destroy_allocator(i32 [[TMP0]], ptr [[CONV1]])
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@.omp_outlined.
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK-64-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]])
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP1]])
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@.omp_outlined..1
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK-64-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK-64-SAME: () #[[ATTR3:[0-9]+]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK-64-NEXT: ret void
// CHECK-LABEL: define {{[^@]+}}@_Z3foov
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TRAITS:%.*]] = alloca [10 x %struct.omp_alloctrait_t], align 8
// CHECK-NEXT: [[MY_ALLOCATOR:%.*]] = alloca i64, align 8
// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-NEXT: store ptr [[TRAITS]], ptr [[TMP0]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-NEXT: store ptr [[TRAITS]], ptr [[TMP1]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK-NEXT: store ptr null, ptr [[TMP2]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-NEXT: store i32 2, ptr [[TMP5]], align 4
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-NEXT: store i32 1, ptr [[TMP6]], align 4
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-NEXT: store ptr null, ptr [[TMP11]], align 8
// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-NEXT: store ptr null, ptr [[TMP12]], align 8
// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-NEXT: store i64 10, ptr [[TMP13]], align 8
// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-NEXT: store i64 0, ptr [[TMP14]], align 8
// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4
// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4
// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-NEXT: store i32 0, ptr [[TMP17]], align 4
// CHECK-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.region_id, ptr [[KERNEL_ARGS]])
// CHECK-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK: omp_offload.failed:
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66(ptr [[TRAITS]]) #[[ATTR2:[0-9]+]]
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK: omp_offload.cont:
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66
// CHECK-SAME: (ptr noundef nonnull align 8 dereferenceable(160) [[TRAITS:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TRAITS_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[MY_ALLOCATOR:%.*]] = alloca i64, align 8
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: store ptr [[TRAITS]], ptr [[TRAITS_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TRAITS_ADDR]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_init_allocator(i32 [[TMP0]], ptr null, i32 10, ptr [[TMP1]])
// CHECK-NEXT: [[CONV:%.*]] = ptrtoint ptr [[TMP2]] to i64
// CHECK-NEXT: store i64 [[CONV]], ptr [[MY_ALLOCATOR]], align 8
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.omp_outlined)
// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[MY_ALLOCATOR]], align 8
// CHECK-NEXT: [[CONV1:%.*]] = inttoptr i64 [[TMP3]] to ptr
// CHECK-NEXT: call void @__kmpc_destroy_allocator(i32 [[TMP0]], ptr [[CONV1]])
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.omp_outlined
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK: cond.true:
// CHECK-NEXT: br label [[COND_END:%.*]]
// CHECK: cond.false:
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK-NEXT: br label [[COND_END]]
// CHECK: cond.end:
// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK: omp.inner.for.cond:
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK: omp.inner.for.body:
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK: omp.inner.for.inc:
// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK: omp.inner.for.end:
// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK: omp.loop.exit:
// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP1]])
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.omp_outlined.omp_outlined
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
// CHECK-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// CHECK-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
// CHECK-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
// CHECK-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK: cond.true:
// CHECK-NEXT: br label [[COND_END:%.*]]
// CHECK: cond.false:
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: br label [[COND_END]]
// CHECK: cond.end:
// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK: omp.inner.for.cond:
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK: omp.inner.for.body:
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK: omp.body.continue:
// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK: omp.inner.for.inc:
// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK: omp.inner.for.end:
// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK: omp.loop.exit:
// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK-SAME: () #[[ATTR3:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK-NEXT: ret void
//

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,770 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
// Check same results after serialization round-trip
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
int foo() {
int i;
int j;
int sum[10][10];
#pragma omp teams loop reduction(+:sum) collapse(2) bind(parallel) \
order(concurrent) lastprivate(j)
for(i=0; i<10; i++)
for(j=0; j<10; j++)
sum[i][j] += i;
return 0;
}
#endif
// IR-LABEL: define {{[^@]+}}@_Z3foov
// IR-SAME: () #[[ATTR0:[0-9]+]] {
// IR-NEXT: entry:
// IR-NEXT: [[I:%.*]] = alloca i32, align 4
// IR-NEXT: [[J:%.*]] = alloca i32, align 4
// IR-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16
// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @_Z3foov.omp_outlined, ptr [[J]], ptr [[SUM]])
// IR-NEXT: ret i32 0
//
//
// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined
// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] {
// IR-NEXT: entry:
// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 16
// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
// IR-NEXT: [[_TMP2:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// IR-NEXT: [[J3:%.*]] = alloca i32, align 4
// IR-NEXT: [[I:%.*]] = alloca i32, align 4
// IR-NEXT: [[J4:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// IR-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8
// IR-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8
// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8
// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8
// IR-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i32 0, i32 0, i32 0
// IR-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100
// IR-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]]
// IR-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// IR: omp.arrayinit.body:
// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// IR-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]]
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
// IR: omp.arrayinit.done:
// IR-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
// IR-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99
// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// IR: cond.true:
// IR-NEXT: br label [[COND_END:%.*]]
// IR: cond.false:
// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// IR-NEXT: br label [[COND_END]]
// IR: cond.end:
// IR-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// IR: omp.inner.for.cond:
// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// IR-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// IR-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// IR: omp.inner.for.body:
// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// IR-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// IR-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64
// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @_Z3foov.omp_outlined.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[J3]], ptr [[SUM1]])
// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// IR: omp.inner.for.inc:
// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// IR-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
// IR-NEXT: br label [[OMP_INNER_FOR_COND]]
// IR: omp.inner.for.end:
// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// IR: omp.loop.exit:
// IR-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]])
// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
// IR-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// IR-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// IR: .omp.lastprivate.then:
// IR-NEXT: store i32 10, ptr [[J3]], align 4
// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[J3]], align 4
// IR-NEXT: store i32 [[TMP20]], ptr [[TMP0]], align 4
// IR-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// IR: .omp.lastprivate.done:
// IR-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// IR-NEXT: store ptr [[SUM1]], ptr [[TMP21]], align 8
// IR-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
// IR-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// IR-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// IR-NEXT: ]
// IR: .omp.reduction.case1:
// IR-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP25]]
// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// IR: omp.arraycpy.body:
// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4
// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
// IR-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]]
// IR-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4
// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1
// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// IR-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP25]]
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]]
// IR: omp.arraycpy.done10:
// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var)
// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR: .omp.reduction.case2:
// IR-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP1]], [[TMP28]]
// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]]
// IR: omp.arraycpy.body12:
// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ]
// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ]
// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4
// IR-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP29]] monotonic, align 4
// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1
// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1
// IR-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP28]]
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]]
// IR: omp.arraycpy.done18:
// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR: .omp.reduction.default:
// IR-NEXT: ret void
//
//
// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined
// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] {
// IR-NEXT: entry:
// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
// IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
// IR-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
// IR-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// IR-NEXT: [[J3:%.*]] = alloca i32, align 4
// IR-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16
// IR-NEXT: [[I:%.*]] = alloca i32, align 4
// IR-NEXT: [[J5:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
// IR-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8
// IR-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8
// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8
// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8
// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// IR-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// IR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32
// IR-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
// IR-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32
// IR-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
// IR-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// IR-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0
// IR-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100
// IR-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]]
// IR-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// IR: omp.arrayinit.body:
// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// IR-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]]
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
// IR: omp.arrayinit.done:
// IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99
// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// IR: cond.true:
// IR-NEXT: br label [[COND_END:%.*]]
// IR: cond.false:
// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-NEXT: br label [[COND_END]]
// IR: cond.end:
// IR-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ]
// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// IR-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// IR: omp.inner.for.cond:
// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]]
// IR-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// IR: omp.inner.for.body:
// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP12]], 10
// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// IR-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP14]], 10
// IR-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10
// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], [[MUL8]]
// IR-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1
// IR-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]]
// IR-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64
// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]]
// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP17]] to i64
// IR-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]]
// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP18]], [[TMP15]]
// IR-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// IR: omp.body.continue:
// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// IR: omp.inner.for.inc:
// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], 1
// IR-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
// IR: omp.inner.for.end:
// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// IR: omp.loop.exit:
// IR-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
// IR-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// IR-NEXT: store ptr [[SUM4]], ptr [[TMP22]], align 8
// IR-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
// IR-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// IR-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// IR-NEXT: ]
// IR: .omp.reduction.case1:
// IR-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP26]]
// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// IR: omp.arraycpy.body:
// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4
// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
// IR-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP27]], [[TMP28]]
// IR-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4
// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1
// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// IR-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP26]]
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]]
// IR: omp.arraycpy.done19:
// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR: .omp.reduction.case2:
// IR-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP1]], [[TMP29]]
// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]]
// IR: omp.arraycpy.body21:
// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ]
// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ]
// IR-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4
// IR-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP30]] monotonic, align 4
// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1
// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1
// IR-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP29]]
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]]
// IR: omp.arraycpy.done27:
// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR: .omp.reduction.default:
// IR-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
// IR-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
// IR-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// IR: .omp.lastprivate.then:
// IR-NEXT: store i32 10, ptr [[J3]], align 4
// IR-NEXT: [[TMP34:%.*]] = load i32, ptr [[J3]], align 4
// IR-NEXT: store i32 [[TMP34]], ptr [[TMP0]], align 4
// IR-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// IR: .omp.lastprivate.done:
// IR-NEXT: ret void
//
//
// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func
// IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
// IR-NEXT: entry:
// IR-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// IR-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// IR-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
// IR-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
// IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
// IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// IR-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
// IR-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100
// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// IR: omp.arraycpy.body:
// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// IR-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
// IR: omp.arraycpy.done2:
// IR-NEXT: ret void
//
//
// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp.reduction.reduction_func
// IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] {
// IR-NEXT: entry:
// IR-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// IR-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// IR-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
// IR-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
// IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
// IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// IR-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
// IR-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100
// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// IR: omp.arraycpy.body:
// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// IR-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
// IR: omp.arraycpy.done2:
// IR-NEXT: ret void
//
//
// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov
// IR-PCH-SAME: () #[[ATTR0:[0-9]+]] {
// IR-PCH-NEXT: entry:
// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16
// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @_Z3foov.omp_outlined, ptr [[J]], ptr [[SUM]])
// IR-PCH-NEXT: ret i32 0
//
//
// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined
// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] {
// IR-PCH-NEXT: entry:
// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 16
// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[_TMP2:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[J3:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[J4:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// IR-PCH-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8
// IR-PCH-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8
// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8
// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8
// IR-PCH-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i32 0, i32 0, i32 0
// IR-PCH-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100
// IR-PCH-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// IR-PCH: omp.arrayinit.body:
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// IR-PCH-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
// IR-PCH: omp.arrayinit.done:
// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
// IR-PCH-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99
// IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// IR-PCH: cond.true:
// IR-PCH-NEXT: br label [[COND_END:%.*]]
// IR-PCH: cond.false:
// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// IR-PCH-NEXT: br label [[COND_END]]
// IR-PCH: cond.end:
// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// IR-PCH: omp.inner.for.cond:
// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// IR-PCH-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// IR-PCH-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// IR-PCH: omp.inner.for.body:
// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// IR-PCH-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// IR-PCH-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64
// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @_Z3foov.omp_outlined.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[J3]], ptr [[SUM1]])
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// IR-PCH: omp.inner.for.inc:
// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
// IR-PCH-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
// IR-PCH: omp.inner.for.end:
// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// IR-PCH: omp.loop.exit:
// IR-PCH-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]])
// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
// IR-PCH-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// IR-PCH-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// IR-PCH: .omp.lastprivate.then:
// IR-PCH-NEXT: store i32 10, ptr [[J3]], align 4
// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[J3]], align 4
// IR-PCH-NEXT: store i32 [[TMP20]], ptr [[TMP0]], align 4
// IR-PCH-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// IR-PCH: .omp.lastprivate.done:
// IR-PCH-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// IR-PCH-NEXT: store ptr [[SUM1]], ptr [[TMP21]], align 8
// IR-PCH-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
// IR-PCH-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// IR-PCH-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// IR-PCH-NEXT: ]
// IR-PCH: .omp.reduction.case1:
// IR-PCH-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP25]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// IR-PCH: omp.arraycpy.body:
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4
// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
// IR-PCH-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]]
// IR-PCH-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP25]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]]
// IR-PCH: omp.arraycpy.done10:
// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var)
// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR-PCH: .omp.reduction.case2:
// IR-PCH-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP1]], [[TMP28]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]]
// IR-PCH: omp.arraycpy.body12:
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ]
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ]
// IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4
// IR-PCH-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP29]] monotonic, align 4
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP28]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]]
// IR-PCH: omp.arraycpy.done18:
// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR-PCH: .omp.reduction.default:
// IR-PCH-NEXT: ret void
//
//
// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined
// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] {
// IR-PCH-NEXT: entry:
// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
// IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
// IR-PCH-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[J3:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16
// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[J5:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
// IR-PCH-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8
// IR-PCH-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8
// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8
// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8
// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
// IR-PCH-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// IR-PCH-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32
// IR-PCH-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
// IR-PCH-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32
// IR-PCH-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
// IR-PCH-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// IR-PCH-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0
// IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100
// IR-PCH-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// IR-PCH: omp.arrayinit.body:
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// IR-PCH-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
// IR-PCH: omp.arrayinit.done:
// IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99
// IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// IR-PCH: cond.true:
// IR-PCH-NEXT: br label [[COND_END:%.*]]
// IR-PCH: cond.false:
// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: br label [[COND_END]]
// IR-PCH: cond.end:
// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ]
// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// IR-PCH-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// IR-PCH: omp.inner.for.cond:
// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-PCH-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]]
// IR-PCH-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// IR-PCH: omp.inner.for.body:
// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP12]], 10
// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-PCH-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP14]], 10
// IR-PCH-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10
// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], [[MUL8]]
// IR-PCH-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1
// IR-PCH-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]]
// IR-PCH-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64
// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]]
// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-PCH-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP17]] to i64
// IR-PCH-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]]
// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-PCH-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP18]], [[TMP15]]
// IR-PCH-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// IR-PCH: omp.body.continue:
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// IR-PCH: omp.inner.for.inc:
// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-PCH-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], 1
// IR-PCH-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
// IR-PCH: omp.inner.for.end:
// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// IR-PCH: omp.loop.exit:
// IR-PCH-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
// IR-PCH-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// IR-PCH-NEXT: store ptr [[SUM4]], ptr [[TMP22]], align 8
// IR-PCH-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
// IR-PCH-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// IR-PCH-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// IR-PCH-NEXT: ]
// IR-PCH: .omp.reduction.case1:
// IR-PCH-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP26]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// IR-PCH: omp.arraycpy.body:
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4
// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
// IR-PCH-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP27]], [[TMP28]]
// IR-PCH-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP26]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]]
// IR-PCH: omp.arraycpy.done19:
// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR-PCH: .omp.reduction.case2:
// IR-PCH-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP1]], [[TMP29]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]]
// IR-PCH: omp.arraycpy.body21:
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ]
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ]
// IR-PCH-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4
// IR-PCH-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP30]] monotonic, align 4
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP29]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]]
// IR-PCH: omp.arraycpy.done27:
// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR-PCH: .omp.reduction.default:
// IR-PCH-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
// IR-PCH-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
// IR-PCH-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// IR-PCH: .omp.lastprivate.then:
// IR-PCH-NEXT: store i32 10, ptr [[J3]], align 4
// IR-PCH-NEXT: [[TMP34:%.*]] = load i32, ptr [[J3]], align 4
// IR-PCH-NEXT: store i32 [[TMP34]], ptr [[TMP0]], align 4
// IR-PCH-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// IR-PCH: .omp.lastprivate.done:
// IR-PCH-NEXT: ret void
//
//
// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func
// IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
// IR-PCH-NEXT: entry:
// IR-PCH-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// IR-PCH-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
// IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
// IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
// IR-PCH-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// IR-PCH-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
// IR-PCH-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100
// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// IR-PCH: omp.arraycpy.body:
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// IR-PCH-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
// IR-PCH: omp.arraycpy.done2:
// IR-PCH-NEXT: ret void
//
//
// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp.reduction.reduction_func
// IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] {
// IR-PCH-NEXT: entry:
// IR-PCH-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// IR-PCH-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
// IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
// IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
// IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
// IR-PCH-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
// IR-PCH-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
// IR-PCH-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100
// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// IR-PCH: omp.arraycpy.body:
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// IR-PCH-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
// IR-PCH: omp.arraycpy.done2:
// IR-PCH-NEXT: ret void
//

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff