[OpenMP][CodeGen] Add codegen for combined 'loop' directives.
The loop directive is a descriptive construct which allows the compiler flexibility in how it generates code for the directive's associated loop(s). See OpenMP specification 5.2 [257:8-9]. Codegen added in this patch for the combined 'loop' directives are: 'target teams loop' -> 'target teams distribute parallel for' 'teams loop' -> 'teams distribute parallel for' 'target parallel loop' -> 'target parallel for' 'parallel loop' -> 'parallel for' NOTE: The implementation of the 'loop' directive itself is unchanged. Differential Revision: https://reviews.llvm.org/D145823
This commit is contained in:
parent
73c12a8ffc
commit
eb61bde829
@ -2374,6 +2374,10 @@ OMPTeamsGenericLoopDirective *OMPTeamsGenericLoopDirective::Create(
|
||||
Dir->setNextLowerBound(Exprs.NLB);
|
||||
Dir->setNextUpperBound(Exprs.NUB);
|
||||
Dir->setNumIterations(Exprs.NumIterations);
|
||||
Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
|
||||
Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
|
||||
Dir->setDistInc(Exprs.DistInc);
|
||||
Dir->setPrevEnsureUpperBound(Exprs.PrevEUB);
|
||||
Dir->setCounters(Exprs.Counters);
|
||||
Dir->setPrivateCounters(Exprs.PrivateCounters);
|
||||
Dir->setInits(Exprs.Inits);
|
||||
@ -2383,6 +2387,15 @@ OMPTeamsGenericLoopDirective *OMPTeamsGenericLoopDirective::Create(
|
||||
Dir->setDependentInits(Exprs.DependentInits);
|
||||
Dir->setFinalsConditions(Exprs.FinalsConditions);
|
||||
Dir->setPreInits(Exprs.PreInits);
|
||||
Dir->setCombinedLowerBoundVariable(Exprs.DistCombinedFields.LB);
|
||||
Dir->setCombinedUpperBoundVariable(Exprs.DistCombinedFields.UB);
|
||||
Dir->setCombinedEnsureUpperBound(Exprs.DistCombinedFields.EUB);
|
||||
Dir->setCombinedInit(Exprs.DistCombinedFields.Init);
|
||||
Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
|
||||
Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
|
||||
Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
|
||||
Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
|
||||
Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
|
||||
return Dir;
|
||||
}
|
||||
|
||||
@ -2418,6 +2431,10 @@ OMPTargetTeamsGenericLoopDirective *OMPTargetTeamsGenericLoopDirective::Create(
|
||||
Dir->setNextLowerBound(Exprs.NLB);
|
||||
Dir->setNextUpperBound(Exprs.NUB);
|
||||
Dir->setNumIterations(Exprs.NumIterations);
|
||||
Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
|
||||
Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
|
||||
Dir->setDistInc(Exprs.DistInc);
|
||||
Dir->setPrevEnsureUpperBound(Exprs.PrevEUB);
|
||||
Dir->setCounters(Exprs.Counters);
|
||||
Dir->setPrivateCounters(Exprs.PrivateCounters);
|
||||
Dir->setInits(Exprs.Inits);
|
||||
@ -2427,6 +2444,15 @@ OMPTargetTeamsGenericLoopDirective *OMPTargetTeamsGenericLoopDirective::Create(
|
||||
Dir->setDependentInits(Exprs.DependentInits);
|
||||
Dir->setFinalsConditions(Exprs.FinalsConditions);
|
||||
Dir->setPreInits(Exprs.PreInits);
|
||||
Dir->setCombinedLowerBoundVariable(Exprs.DistCombinedFields.LB);
|
||||
Dir->setCombinedUpperBoundVariable(Exprs.DistCombinedFields.UB);
|
||||
Dir->setCombinedEnsureUpperBound(Exprs.DistCombinedFields.EUB);
|
||||
Dir->setCombinedInit(Exprs.DistCombinedFields.Init);
|
||||
Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
|
||||
Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
|
||||
Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
|
||||
Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
|
||||
Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
|
||||
return Dir;
|
||||
}
|
||||
|
||||
|
||||
@ -603,7 +603,9 @@ bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) {
|
||||
DKind == OMPD_teams_distribute_parallel_for_simd ||
|
||||
DKind == OMPD_teams_distribute_parallel_for ||
|
||||
DKind == OMPD_target_teams_distribute_parallel_for ||
|
||||
DKind == OMPD_target_teams_distribute_parallel_for_simd;
|
||||
DKind == OMPD_target_teams_distribute_parallel_for_simd ||
|
||||
DKind == OMPD_parallel_loop || DKind == OMPD_teams_loop ||
|
||||
DKind == OMPD_target_parallel_loop || DKind == OMPD_target_teams_loop;
|
||||
}
|
||||
|
||||
bool clang::isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind) {
|
||||
@ -632,7 +634,8 @@ bool clang::isOpenMPParallelDirective(OpenMPDirectiveKind DKind) {
|
||||
DKind == OMPD_parallel_master_taskloop_simd ||
|
||||
DKind == OMPD_parallel_masked_taskloop ||
|
||||
DKind == OMPD_parallel_masked_taskloop_simd ||
|
||||
DKind == OMPD_parallel_loop || DKind == OMPD_target_parallel_loop;
|
||||
DKind == OMPD_parallel_loop || DKind == OMPD_target_parallel_loop ||
|
||||
DKind == OMPD_teams_loop;
|
||||
}
|
||||
|
||||
bool clang::isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind) {
|
||||
@ -729,7 +732,8 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) {
|
||||
Kind == OMPD_teams_distribute_parallel_for_simd ||
|
||||
Kind == OMPD_teams_distribute_parallel_for ||
|
||||
Kind == OMPD_target_teams_distribute_parallel_for ||
|
||||
Kind == OMPD_target_teams_distribute_parallel_for_simd;
|
||||
Kind == OMPD_target_teams_distribute_parallel_for_simd ||
|
||||
Kind == OMPD_teams_loop || Kind == OMPD_target_teams_loop;
|
||||
}
|
||||
|
||||
bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) {
|
||||
@ -766,7 +770,6 @@ void clang::getOpenMPCaptureRegions(
|
||||
case OMPD_target_teams:
|
||||
case OMPD_target_teams_distribute:
|
||||
case OMPD_target_teams_distribute_simd:
|
||||
case OMPD_target_teams_loop:
|
||||
CaptureRegions.push_back(OMPD_task);
|
||||
CaptureRegions.push_back(OMPD_target);
|
||||
CaptureRegions.push_back(OMPD_teams);
|
||||
@ -781,6 +784,7 @@ void clang::getOpenMPCaptureRegions(
|
||||
CaptureRegions.push_back(OMPD_task);
|
||||
CaptureRegions.push_back(OMPD_target);
|
||||
break;
|
||||
case OMPD_teams_loop:
|
||||
case OMPD_teams_distribute_parallel_for:
|
||||
case OMPD_teams_distribute_parallel_for_simd:
|
||||
CaptureRegions.push_back(OMPD_teams);
|
||||
@ -815,6 +819,7 @@ void clang::getOpenMPCaptureRegions(
|
||||
CaptureRegions.push_back(OMPD_parallel);
|
||||
CaptureRegions.push_back(OMPD_taskloop);
|
||||
break;
|
||||
case OMPD_target_teams_loop:
|
||||
case OMPD_target_teams_distribute_parallel_for:
|
||||
case OMPD_target_teams_distribute_parallel_for_simd:
|
||||
CaptureRegions.push_back(OMPD_task);
|
||||
@ -822,9 +827,6 @@ void clang::getOpenMPCaptureRegions(
|
||||
CaptureRegions.push_back(OMPD_teams);
|
||||
CaptureRegions.push_back(OMPD_parallel);
|
||||
break;
|
||||
case OMPD_teams_loop:
|
||||
CaptureRegions.push_back(OMPD_teams);
|
||||
break;
|
||||
case OMPD_nothing:
|
||||
CaptureRegions.push_back(OMPD_nothing);
|
||||
break;
|
||||
|
||||
@ -2812,7 +2812,7 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
|
||||
const StaticRTInput &Values) {
|
||||
OpenMPSchedType ScheduleNum = getRuntimeSchedule(
|
||||
ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
|
||||
assert(isOpenMPWorksharingDirective(DKind) &&
|
||||
assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
|
||||
"Expected loop-based or sections-based directive.");
|
||||
llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
|
||||
isOpenMPLoopDirective(DKind)
|
||||
@ -6206,6 +6206,7 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
|
||||
DefaultVal = -1;
|
||||
return nullptr;
|
||||
}
|
||||
case OMPD_target_teams_loop:
|
||||
case OMPD_target_teams:
|
||||
case OMPD_target_teams_distribute:
|
||||
case OMPD_target_teams_distribute_simd:
|
||||
@ -6225,12 +6226,14 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
|
||||
case OMPD_target_parallel:
|
||||
case OMPD_target_parallel_for:
|
||||
case OMPD_target_parallel_for_simd:
|
||||
case OMPD_target_parallel_loop:
|
||||
case OMPD_target_simd:
|
||||
DefaultVal = 1;
|
||||
return nullptr;
|
||||
case OMPD_parallel:
|
||||
case OMPD_for:
|
||||
case OMPD_parallel_for:
|
||||
case OMPD_parallel_loop:
|
||||
case OMPD_parallel_master:
|
||||
case OMPD_parallel_sections:
|
||||
case OMPD_for_simd:
|
||||
@ -6447,6 +6450,8 @@ const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
|
||||
return ThreadLimit;
|
||||
}
|
||||
return nullptr;
|
||||
case OMPD_target_teams_loop:
|
||||
case OMPD_target_parallel_loop:
|
||||
case OMPD_target_parallel:
|
||||
case OMPD_target_parallel_for:
|
||||
case OMPD_target_parallel_for_simd:
|
||||
@ -6649,6 +6654,8 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
|
||||
getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal))
|
||||
return NumThreads;
|
||||
return Bld.getInt32(0);
|
||||
case OMPD_target_teams_loop:
|
||||
case OMPD_target_parallel_loop:
|
||||
case OMPD_target_parallel:
|
||||
case OMPD_target_parallel_for:
|
||||
case OMPD_target_parallel_for_simd:
|
||||
@ -9072,7 +9079,8 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
|
||||
OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
|
||||
switch (D.getDirectiveKind()) {
|
||||
case OMPD_target:
|
||||
if (isOpenMPDistributeDirective(DKind))
|
||||
// For now, just treat 'target teams loop' as if it's distributed.
|
||||
if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
|
||||
return NestedDir;
|
||||
if (DKind == OMPD_teams) {
|
||||
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
|
||||
@ -9556,7 +9564,8 @@ llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
|
||||
OpenMPDirectiveKind Kind = D.getDirectiveKind();
|
||||
const OMPExecutableDirective *TD = &D;
|
||||
// Get nested teams distribute kind directive, if any.
|
||||
if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
|
||||
if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
|
||||
Kind != OMPD_target_teams_loop)
|
||||
TD = getNestedDistributeDirective(CGM.getContext(), D);
|
||||
if (!TD)
|
||||
return llvm::ConstantInt::get(CGF.Int64Ty, 0);
|
||||
@ -9945,6 +9954,14 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
|
||||
CGM, ParentName,
|
||||
cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
|
||||
break;
|
||||
case OMPD_target_teams_loop:
|
||||
CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
|
||||
CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
|
||||
break;
|
||||
case OMPD_target_parallel_loop:
|
||||
CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
|
||||
CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
|
||||
break;
|
||||
case OMPD_parallel:
|
||||
case OMPD_for:
|
||||
case OMPD_parallel_for:
|
||||
|
||||
@ -653,6 +653,8 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
|
||||
case OMPD_target:
|
||||
case OMPD_target_teams:
|
||||
return hasNestedSPMDDirective(Ctx, D);
|
||||
case OMPD_target_teams_loop:
|
||||
case OMPD_target_parallel_loop:
|
||||
case OMPD_target_parallel:
|
||||
case OMPD_target_parallel_for:
|
||||
case OMPD_target_parallel_for_simd:
|
||||
|
||||
@ -416,16 +416,19 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
|
||||
EmitOMPGenericLoopDirective(cast<OMPGenericLoopDirective>(*S));
|
||||
break;
|
||||
case Stmt::OMPTeamsGenericLoopDirectiveClass:
|
||||
llvm_unreachable("teams loop directive not supported yet.");
|
||||
EmitOMPTeamsGenericLoopDirective(cast<OMPTeamsGenericLoopDirective>(*S));
|
||||
break;
|
||||
case Stmt::OMPTargetTeamsGenericLoopDirectiveClass:
|
||||
llvm_unreachable("target teams loop directive not supported yet.");
|
||||
EmitOMPTargetTeamsGenericLoopDirective(
|
||||
cast<OMPTargetTeamsGenericLoopDirective>(*S));
|
||||
break;
|
||||
case Stmt::OMPParallelGenericLoopDirectiveClass:
|
||||
llvm_unreachable("parallel loop directive not supported yet.");
|
||||
EmitOMPParallelGenericLoopDirective(
|
||||
cast<OMPParallelGenericLoopDirective>(*S));
|
||||
break;
|
||||
case Stmt::OMPTargetParallelGenericLoopDirectiveClass:
|
||||
llvm_unreachable("target parallel loop directive not supported yet.");
|
||||
EmitOMPTargetParallelGenericLoopDirective(
|
||||
cast<OMPTargetParallelGenericLoopDirective>(*S));
|
||||
break;
|
||||
case Stmt::OMPParallelMaskedDirectiveClass:
|
||||
EmitOMPParallelMaskedDirective(cast<OMPParallelMaskedDirective>(*S));
|
||||
|
||||
@ -7852,6 +7852,148 @@ void CodeGenFunction::EmitOMPGenericLoopDirective(
|
||||
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPParallelGenericLoopDirective(
|
||||
const OMPLoopDirective &S) {
|
||||
// Emit combined directive as if its consituent constructs are 'parallel'
|
||||
// and 'for'.
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
||||
Action.Enter(CGF);
|
||||
emitOMPCopyinClause(CGF, S);
|
||||
(void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
|
||||
};
|
||||
{
|
||||
auto LPCRegion =
|
||||
CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
|
||||
emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
|
||||
emitEmptyBoundParameters);
|
||||
}
|
||||
// Check for outer lastprivate conditional update.
|
||||
checkForLastprivateConditionalUpdate(*this, S);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
|
||||
const OMPTeamsGenericLoopDirective &S) {
|
||||
// To be consistent with current behavior of 'target teams loop', emit
|
||||
// 'teams loop' as if its constituent constructs are 'distribute,
|
||||
// 'parallel, and 'for'.
|
||||
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
||||
CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
|
||||
S.getDistInc());
|
||||
};
|
||||
|
||||
// Emit teams region as a standalone region.
|
||||
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
|
||||
PrePostActionTy &Action) {
|
||||
Action.Enter(CGF);
|
||||
OMPPrivateScope PrivateScope(CGF);
|
||||
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
|
||||
(void)PrivateScope.Privatize();
|
||||
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
|
||||
CodeGenDistribute);
|
||||
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
|
||||
};
|
||||
emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
|
||||
emitPostUpdateForReductionClause(*this, S,
|
||||
[](CodeGenFunction &) { return nullptr; });
|
||||
}
|
||||
|
||||
static void
|
||||
emitTargetTeamsGenericLoopRegion(CodeGenFunction &CGF,
|
||||
const OMPTargetTeamsGenericLoopDirective &S,
|
||||
PrePostActionTy &Action) {
|
||||
Action.Enter(CGF);
|
||||
// Emit 'teams loop' as if its constituent constructs are 'distribute,
|
||||
// 'parallel, and 'for'.
|
||||
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
||||
CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
|
||||
S.getDistInc());
|
||||
};
|
||||
|
||||
// Emit teams region as a standalone region.
|
||||
auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
|
||||
PrePostActionTy &Action) {
|
||||
Action.Enter(CGF);
|
||||
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
|
||||
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
|
||||
(void)PrivateScope.Privatize();
|
||||
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
|
||||
CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
|
||||
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
|
||||
};
|
||||
|
||||
emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
|
||||
CodeGenTeams);
|
||||
emitPostUpdateForReductionClause(CGF, S,
|
||||
[](CodeGenFunction &) { return nullptr; });
|
||||
}
|
||||
|
||||
/// Emit combined directive 'target teams loop' as if its constituent
|
||||
/// constructs are 'target', 'teams', 'distribute', 'parallel', and 'for'.
|
||||
void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective(
|
||||
const OMPTargetTeamsGenericLoopDirective &S) {
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
||||
emitTargetTeamsGenericLoopRegion(CGF, S, Action);
|
||||
};
|
||||
emitCommonOMPTargetDirective(*this, S, CodeGen);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
|
||||
CodeGenModule &CGM, StringRef ParentName,
|
||||
const OMPTargetTeamsGenericLoopDirective &S) {
|
||||
// Emit SPMD target parallel loop region as a standalone region.
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
||||
emitTargetTeamsGenericLoopRegion(CGF, S, Action);
|
||||
};
|
||||
llvm::Function *Fn;
|
||||
llvm::Constant *Addr;
|
||||
// Emit target region as a standalone region.
|
||||
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
|
||||
S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
|
||||
assert(Fn && Addr &&
|
||||
"Target device function emission failed for 'target teams loop'.");
|
||||
}
|
||||
|
||||
static void emitTargetParallelGenericLoopRegion(
|
||||
CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S,
|
||||
PrePostActionTy &Action) {
|
||||
Action.Enter(CGF);
|
||||
// Emit as 'parallel for'.
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
||||
Action.Enter(CGF);
|
||||
CodeGenFunction::OMPCancelStackRAII CancelRegion(
|
||||
CGF, OMPD_target_parallel_loop, /*hasCancel=*/false);
|
||||
CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
|
||||
emitDispatchForLoopBounds);
|
||||
};
|
||||
emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
|
||||
emitEmptyBoundParameters);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
|
||||
CodeGenModule &CGM, StringRef ParentName,
|
||||
const OMPTargetParallelGenericLoopDirective &S) {
|
||||
// Emit target parallel loop region as a standalone region.
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
||||
emitTargetParallelGenericLoopRegion(CGF, S, Action);
|
||||
};
|
||||
llvm::Function *Fn;
|
||||
llvm::Constant *Addr;
|
||||
// Emit target region as a standalone region.
|
||||
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
|
||||
S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
|
||||
assert(Fn && Addr && "Target device function emission failed.");
|
||||
}
|
||||
|
||||
/// Emit combined directive 'target parallel loop' as if its constituent
|
||||
/// constructs are 'target', 'parallel', and 'for'.
|
||||
void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective(
|
||||
const OMPTargetParallelGenericLoopDirective &S) {
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
||||
emitTargetParallelGenericLoopRegion(CGF, S, Action);
|
||||
};
|
||||
emitCommonOMPTargetDirective(*this, S, CodeGen);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitSimpleOMPExecutableDirective(
|
||||
const OMPExecutableDirective &D) {
|
||||
if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
|
||||
|
||||
@ -3580,6 +3580,12 @@ public:
|
||||
void EmitOMPTargetTeamsDistributeSimdDirective(
|
||||
const OMPTargetTeamsDistributeSimdDirective &S);
|
||||
void EmitOMPGenericLoopDirective(const OMPGenericLoopDirective &S);
|
||||
void EmitOMPParallelGenericLoopDirective(const OMPLoopDirective &S);
|
||||
void EmitOMPTargetParallelGenericLoopDirective(
|
||||
const OMPTargetParallelGenericLoopDirective &S);
|
||||
void EmitOMPTargetTeamsGenericLoopDirective(
|
||||
const OMPTargetTeamsGenericLoopDirective &S);
|
||||
void EmitOMPTeamsGenericLoopDirective(const OMPTeamsGenericLoopDirective &S);
|
||||
void EmitOMPInteropDirective(const OMPInteropDirective &S);
|
||||
void EmitOMPParallelMaskedDirective(const OMPParallelMaskedDirective &S);
|
||||
|
||||
@ -3620,6 +3626,16 @@ public:
|
||||
CodeGenModule &CGM, StringRef ParentName,
|
||||
const OMPTargetTeamsDistributeParallelForSimdDirective &S);
|
||||
|
||||
/// Emit device code for the target teams loop directive.
|
||||
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(
|
||||
CodeGenModule &CGM, StringRef ParentName,
|
||||
const OMPTargetTeamsGenericLoopDirective &S);
|
||||
|
||||
/// Emit device code for the target parallel loop directive.
|
||||
static void EmitOMPTargetParallelGenericLoopDeviceFunction(
|
||||
CodeGenModule &CGM, StringRef ParentName,
|
||||
const OMPTargetParallelGenericLoopDirective &S);
|
||||
|
||||
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
|
||||
CodeGenModule &CGM, StringRef ParentName,
|
||||
const OMPTargetTeamsDistributeParallelForDirective &S);
|
||||
|
||||
@ -4199,7 +4199,6 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
|
||||
case OMPD_target_parallel:
|
||||
case OMPD_target_parallel_for:
|
||||
case OMPD_target_parallel_for_simd:
|
||||
case OMPD_target_teams_loop:
|
||||
case OMPD_target_parallel_loop:
|
||||
case OMPD_target_teams_distribute:
|
||||
case OMPD_target_teams_distribute_simd: {
|
||||
@ -4448,6 +4447,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
|
||||
Params);
|
||||
break;
|
||||
}
|
||||
case OMPD_target_teams_loop:
|
||||
case OMPD_target_teams_distribute_parallel_for:
|
||||
case OMPD_target_teams_distribute_parallel_for_simd: {
|
||||
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1).withConst();
|
||||
@ -4506,22 +4506,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
|
||||
break;
|
||||
}
|
||||
|
||||
case OMPD_teams_loop: {
|
||||
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1).withConst();
|
||||
QualType KmpInt32PtrTy =
|
||||
Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
|
||||
|
||||
Sema::CapturedParamNameType ParamsTeams[] = {
|
||||
std::make_pair(".global_tid.", KmpInt32PtrTy),
|
||||
std::make_pair(".bound_tid.", KmpInt32PtrTy),
|
||||
std::make_pair(StringRef(), QualType()) // __context with shared vars
|
||||
};
|
||||
// Start a captured region for 'teams'.
|
||||
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
|
||||
ParamsTeams, /*OpenMPCaptureLevel=*/0);
|
||||
break;
|
||||
}
|
||||
|
||||
case OMPD_teams_loop:
|
||||
case OMPD_teams_distribute_parallel_for:
|
||||
case OMPD_teams_distribute_parallel_for_simd: {
|
||||
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1).withConst();
|
||||
@ -15381,6 +15366,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
|
||||
break;
|
||||
}
|
||||
[[fallthrough]];
|
||||
case OMPD_target_teams_loop:
|
||||
case OMPD_target_teams_distribute_parallel_for:
|
||||
// If this clause applies to the nested 'parallel' region, capture within
|
||||
// the 'teams' region, otherwise do not capture.
|
||||
@ -15473,7 +15459,6 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
|
||||
case OMPD_target:
|
||||
case OMPD_target_teams:
|
||||
case OMPD_target_teams_distribute:
|
||||
case OMPD_target_teams_loop:
|
||||
case OMPD_distribute_parallel_for:
|
||||
case OMPD_task:
|
||||
case OMPD_taskloop:
|
||||
|
||||
117
clang/test/OpenMP/generic_loop_codegen.cpp
Normal file
117
clang/test/OpenMP/generic_loop_codegen.cpp
Normal file
@ -0,0 +1,117 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]"
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp %s
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
|
||||
|
||||
// Check same results after serialization round-trip
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH
|
||||
|
||||
// expected-no-diagnostics
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
void foo(int t) {
|
||||
|
||||
int i, j, z;
|
||||
#pragma omp loop collapse(2) reduction(+:z) lastprivate(j) bind(thread)
|
||||
for (int i = 0; i<t; ++i)
|
||||
for (j = 0; j<t; ++j)
|
||||
z += i+j;
|
||||
}
|
||||
#endif
|
||||
// IR-LABEL: define {{[^@]+}}@_Z3fooi
|
||||
// IR-SAME: (i32 noundef [[T:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
// IR-NEXT: entry:
|
||||
// IR-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[J:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[Z:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[I1:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: store i32 [[T]], ptr [[T_ADDR]], align 4
|
||||
// IR-NEXT: store i32 0, ptr [[I1]], align 4
|
||||
// IR-NEXT: br label [[FOR_COND:%.*]]
|
||||
// IR: for.cond:
|
||||
// IR-NEXT: [[TMP0:%.*]] = load i32, ptr [[I1]], align 4
|
||||
// IR-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_ADDR]], align 4
|
||||
// IR-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP1]]
|
||||
// IR-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END8:%.*]]
|
||||
// IR: for.body:
|
||||
// IR-NEXT: store i32 0, ptr [[J]], align 4
|
||||
// IR-NEXT: br label [[FOR_COND2:%.*]]
|
||||
// IR: for.cond2:
|
||||
// IR-NEXT: [[TMP2:%.*]] = load i32, ptr [[J]], align 4
|
||||
// IR-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_ADDR]], align 4
|
||||
// IR-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP2]], [[TMP3]]
|
||||
// IR-NEXT: br i1 [[CMP3]], label [[FOR_BODY4:%.*]], label [[FOR_END:%.*]]
|
||||
// IR: for.body4:
|
||||
// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[I1]], align 4
|
||||
// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[J]], align 4
|
||||
// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
|
||||
// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[Z]], align 4
|
||||
// IR-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP6]], [[ADD]]
|
||||
// IR-NEXT: store i32 [[ADD5]], ptr [[Z]], align 4
|
||||
// IR-NEXT: br label [[FOR_INC:%.*]]
|
||||
// IR: for.inc:
|
||||
// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4
|
||||
// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
|
||||
// IR-NEXT: store i32 [[INC]], ptr [[J]], align 4
|
||||
// IR-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP3:![0-9]+]]
|
||||
// IR: for.end:
|
||||
// IR-NEXT: br label [[FOR_INC6:%.*]]
|
||||
// IR: for.inc6:
|
||||
// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[I1]], align 4
|
||||
// IR-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP8]], 1
|
||||
// IR-NEXT: store i32 [[INC7]], ptr [[I1]], align 4
|
||||
// IR-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
|
||||
// IR: for.end8:
|
||||
// IR-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// IR-PCH-LABEL: define {{[^@]+}}@_Z3fooi
|
||||
// IR-PCH-SAME: (i32 noundef [[T:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
// IR-PCH-NEXT: entry:
|
||||
// IR-PCH-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[Z:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[I1:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: store i32 [[T]], ptr [[T_ADDR]], align 4
|
||||
// IR-PCH-NEXT: store i32 0, ptr [[I1]], align 4
|
||||
// IR-PCH-NEXT: br label [[FOR_COND:%.*]]
|
||||
// IR-PCH: for.cond:
|
||||
// IR-PCH-NEXT: [[TMP0:%.*]] = load i32, ptr [[I1]], align 4
|
||||
// IR-PCH-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_ADDR]], align 4
|
||||
// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP1]]
|
||||
// IR-PCH-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END8:%.*]]
|
||||
// IR-PCH: for.body:
|
||||
// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4
|
||||
// IR-PCH-NEXT: br label [[FOR_COND2:%.*]]
|
||||
// IR-PCH: for.cond2:
|
||||
// IR-PCH-NEXT: [[TMP2:%.*]] = load i32, ptr [[J]], align 4
|
||||
// IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_ADDR]], align 4
|
||||
// IR-PCH-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP2]], [[TMP3]]
|
||||
// IR-PCH-NEXT: br i1 [[CMP3]], label [[FOR_BODY4:%.*]], label [[FOR_END:%.*]]
|
||||
// IR-PCH: for.body4:
|
||||
// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[I1]], align 4
|
||||
// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[J]], align 4
|
||||
// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
|
||||
// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[Z]], align 4
|
||||
// IR-PCH-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP6]], [[ADD]]
|
||||
// IR-PCH-NEXT: store i32 [[ADD5]], ptr [[Z]], align 4
|
||||
// IR-PCH-NEXT: br label [[FOR_INC:%.*]]
|
||||
// IR-PCH: for.inc:
|
||||
// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4
|
||||
// IR-PCH-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
|
||||
// IR-PCH-NEXT: store i32 [[INC]], ptr [[J]], align 4
|
||||
// IR-PCH-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP3:![0-9]+]]
|
||||
// IR-PCH: for.end:
|
||||
// IR-PCH-NEXT: br label [[FOR_INC6:%.*]]
|
||||
// IR-PCH: for.inc6:
|
||||
// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[I1]], align 4
|
||||
// IR-PCH-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP8]], 1
|
||||
// IR-PCH-NEXT: store i32 [[INC7]], ptr [[I1]], align 4
|
||||
// IR-PCH-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
|
||||
// IR-PCH: for.end8:
|
||||
// IR-PCH-NEXT: ret void
|
||||
//
|
||||
3746
clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp
Normal file
3746
clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,525 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
|
||||
// Test target codegen - host bc file has to be created first.
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2
|
||||
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2
|
||||
|
||||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
int a;
|
||||
|
||||
int foo(int *a);
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
#pragma omp target teams loop map(tofrom:a) if(target:argc)
|
||||
for (int i= 0; i < argc; ++i)
|
||||
a = foo(&i) + foo(&a) + foo(&argc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24
|
||||
// CHECK1-SAME: (i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8
|
||||
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
|
||||
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
|
||||
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
|
||||
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
|
||||
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
|
||||
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
||||
// CHECK1: user_code.entry:
|
||||
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
|
||||
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
|
||||
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
|
||||
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8
|
||||
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
|
||||
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
|
||||
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]]
|
||||
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
|
||||
// CHECK1-NEXT: ret void
|
||||
// CHECK1: worker.exit:
|
||||
// CHECK1-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined
|
||||
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8
|
||||
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
|
||||
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
|
||||
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
|
||||
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
|
||||
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
|
||||
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
|
||||
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
|
||||
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
|
||||
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
|
||||
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
|
||||
// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
|
||||
// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
|
||||
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
|
||||
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
|
||||
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
|
||||
// CHECK1: omp.precond.then:
|
||||
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
||||
// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
|
||||
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
|
||||
// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
|
||||
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
||||
// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
|
||||
// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// CHECK1: cond.true:
|
||||
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
||||
// CHECK1-NEXT: br label [[COND_END:%.*]]
|
||||
// CHECK1: cond.false:
|
||||
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK1-NEXT: br label [[COND_END]]
|
||||
// CHECK1: cond.end:
|
||||
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
|
||||
// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// CHECK1: omp.inner.for.cond:
|
||||
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
||||
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
|
||||
// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
|
||||
// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// CHECK1: omp.inner.for.body:
|
||||
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
|
||||
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
|
||||
// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
|
||||
// CHECK1-NEXT: store i32 [[TMP18]], ptr [[ARGC_CASTED]], align 4
|
||||
// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8
|
||||
// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
|
||||
// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr
|
||||
// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8
|
||||
// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
|
||||
// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr
|
||||
// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8
|
||||
// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
|
||||
// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr
|
||||
// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8
|
||||
// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
|
||||
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8
|
||||
// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
|
||||
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// CHECK1: omp.inner.for.inc:
|
||||
// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
|
||||
// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
|
||||
// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
|
||||
// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
||||
// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]]
|
||||
// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
|
||||
// CHECK1: cond.true10:
|
||||
// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
||||
// CHECK1-NEXT: br label [[COND_END12:%.*]]
|
||||
// CHECK1: cond.false11:
|
||||
// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK1-NEXT: br label [[COND_END12]]
|
||||
// CHECK1: cond.end12:
|
||||
// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ]
|
||||
// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK1-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// CHECK1: omp.inner.for.end:
|
||||
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// CHECK1: omp.loop.exit:
|
||||
// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
|
||||
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP41]])
|
||||
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
|
||||
// CHECK1: omp.precond.end:
|
||||
// CHECK1-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined
|
||||
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
|
||||
// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
|
||||
// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
|
||||
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
|
||||
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
|
||||
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
|
||||
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
|
||||
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
|
||||
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
|
||||
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
|
||||
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
|
||||
// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
|
||||
// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
|
||||
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
|
||||
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
|
||||
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
|
||||
// CHECK1: omp.precond.then:
|
||||
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
||||
// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
|
||||
// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32
|
||||
// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
|
||||
// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32
|
||||
// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
|
||||
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// CHECK1: omp.inner.for.cond:
|
||||
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64
|
||||
// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
|
||||
// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]]
|
||||
// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// CHECK1: omp.inner.for.body:
|
||||
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
|
||||
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
|
||||
// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR5:[0-9]+]]
|
||||
// CHECK1-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR5]]
|
||||
// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]]
|
||||
// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR5]]
|
||||
// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]]
|
||||
// CHECK1-NEXT: store i32 [[ADD10]], ptr [[TMP0]], align 4
|
||||
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// CHECK1: omp.body.continue:
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// CHECK1: omp.inner.for.inc:
|
||||
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
|
||||
// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// CHECK1: omp.inner.for.end:
|
||||
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// CHECK1: omp.loop.exit:
|
||||
// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
|
||||
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP16]])
|
||||
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
|
||||
// CHECK1: omp.precond.end:
|
||||
// CHECK1-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24
|
||||
// CHECK2-SAME: (i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
// CHECK2-NEXT: entry:
|
||||
// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
|
||||
// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
|
||||
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
|
||||
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
|
||||
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
|
||||
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
|
||||
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
||||
// CHECK2: user_code.entry:
|
||||
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
|
||||
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
|
||||
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
|
||||
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4
|
||||
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
|
||||
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
|
||||
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]]
|
||||
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
|
||||
// CHECK2-NEXT: ret void
|
||||
// CHECK2: worker.exit:
|
||||
// CHECK2-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined
|
||||
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
// CHECK2-NEXT: entry:
|
||||
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
|
||||
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
|
||||
// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
|
||||
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4
|
||||
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
||||
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
|
||||
// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
|
||||
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
|
||||
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
|
||||
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
|
||||
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
|
||||
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
|
||||
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
|
||||
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
|
||||
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
|
||||
// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
|
||||
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
|
||||
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
|
||||
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
|
||||
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
|
||||
// CHECK2: omp.precond.then:
|
||||
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
||||
// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
|
||||
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
||||
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
|
||||
// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
|
||||
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
||||
// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
|
||||
// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// CHECK2: cond.true:
|
||||
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
||||
// CHECK2-NEXT: br label [[COND_END:%.*]]
|
||||
// CHECK2: cond.false:
|
||||
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK2-NEXT: br label [[COND_END]]
|
||||
// CHECK2: cond.end:
|
||||
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
|
||||
// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// CHECK2: omp.inner.for.cond:
|
||||
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
||||
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
|
||||
// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
|
||||
// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// CHECK2: omp.inner.for.body:
|
||||
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
|
||||
// CHECK2-NEXT: store i32 [[TMP16]], ptr [[ARGC_CASTED]], align 4
|
||||
// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4
|
||||
// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
|
||||
// CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to ptr
|
||||
// CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 4
|
||||
// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
|
||||
// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to ptr
|
||||
// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4
|
||||
// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
|
||||
// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to ptr
|
||||
// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4
|
||||
// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
|
||||
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4
|
||||
// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
||||
// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
|
||||
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4)
|
||||
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// CHECK2: omp.inner.for.inc:
|
||||
// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], [[TMP28]]
|
||||
// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
|
||||
// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
|
||||
// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
||||
// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]]
|
||||
// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
|
||||
// CHECK2: cond.true10:
|
||||
// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
||||
// CHECK2-NEXT: br label [[COND_END12:%.*]]
|
||||
// CHECK2: cond.false11:
|
||||
// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK2-NEXT: br label [[COND_END12]]
|
||||
// CHECK2: cond.end12:
|
||||
// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE10]] ], [ [[TMP36]], [[COND_FALSE11]] ]
|
||||
// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK2-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// CHECK2: omp.inner.for.end:
|
||||
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// CHECK2: omp.loop.exit:
|
||||
// CHECK2-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
||||
// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4
|
||||
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP39]])
|
||||
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
|
||||
// CHECK2: omp.precond.end:
|
||||
// CHECK2-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined
|
||||
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] {
|
||||
// CHECK2-NEXT: entry:
|
||||
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
|
||||
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
|
||||
// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
|
||||
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
|
||||
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
||||
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
|
||||
// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
|
||||
// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
|
||||
// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
|
||||
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
|
||||
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
|
||||
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
|
||||
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
|
||||
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
|
||||
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
|
||||
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
|
||||
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
|
||||
// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
|
||||
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
|
||||
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
|
||||
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
|
||||
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
|
||||
// CHECK2: omp.precond.then:
|
||||
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
|
||||
// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
|
||||
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
|
||||
// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
||||
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
|
||||
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// CHECK2: omp.inner.for.cond:
|
||||
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
|
||||
// CHECK2-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]]
|
||||
// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// CHECK2: omp.inner.for.body:
|
||||
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
|
||||
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// CHECK2-NEXT: store i32 [[ADD]], ptr [[I3]], align 4
|
||||
// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I3]]) #[[ATTR5:[0-9]+]]
|
||||
// CHECK2-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR5]]
|
||||
// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[CALL]], [[CALL5]]
|
||||
// CHECK2-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR5]]
|
||||
// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD6]], [[CALL7]]
|
||||
// CHECK2-NEXT: store i32 [[ADD8]], ptr [[TMP0]], align 4
|
||||
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// CHECK2: omp.body.continue:
|
||||
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// CHECK2: omp.inner.for.inc:
|
||||
// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
|
||||
// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// CHECK2: omp.inner.for.end:
|
||||
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// CHECK2: omp.loop.exit:
|
||||
// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
||||
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
|
||||
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP16]])
|
||||
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
|
||||
// CHECK2: omp.precond.end:
|
||||
// CHECK2-NEXT: ret void
|
||||
//
|
||||
224
clang/test/OpenMP/parallel_generic_loop_codegen.cpp
Normal file
224
clang/test/OpenMP/parallel_generic_loop_codegen.cpp
Normal file
@ -0,0 +1,224 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp %s
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
|
||||
|
||||
// Check same results after serialization round-trip
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH
|
||||
|
||||
// expected-no-diagnostics
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
#define N 64
|
||||
int foo() {
|
||||
int x = 0;
|
||||
int result[N] = {0};
|
||||
|
||||
#pragma omp parallel loop num_threads(N) allocate(x) private(x) collapse(2)
|
||||
for (int i = 0; i < N; i++)
|
||||
for (int j = 0; j < N; j++)
|
||||
result[i] = i + j + x;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
// IR-LABEL: define {{[^@]+}}@_Z3foov
|
||||
// IR-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
// IR-NEXT: entry:
|
||||
// IR-NEXT: [[X:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[RESULT:%.*]] = alloca [64 x i32], align 16
|
||||
// IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
|
||||
// IR-NEXT: store i32 0, ptr [[X]], align 4
|
||||
// IR-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[RESULT]], i8 0, i64 256, i1 false)
|
||||
// IR-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP0]], i32 64)
|
||||
// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @_Z3foov.omp_outlined, ptr [[RESULT]])
|
||||
// IR-NEXT: ret i32 0
|
||||
//
|
||||
//
|
||||
// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined
|
||||
// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[RESULT:%.*]]) #[[ATTR2:[0-9]+]] {
|
||||
// IR-NEXT: entry:
|
||||
// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[J:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// IR-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR]], align 8
|
||||
// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8
|
||||
// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// IR-NEXT: store i32 4095, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
|
||||
// IR-NEXT: [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr null)
|
||||
// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// IR-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 4095
|
||||
// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// IR: cond.true:
|
||||
// IR-NEXT: br label [[COND_END:%.*]]
|
||||
// IR: cond.false:
|
||||
// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-NEXT: br label [[COND_END]]
|
||||
// IR: cond.end:
|
||||
// IR-NEXT: [[COND:%.*]] = phi i32 [ 4095, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
|
||||
// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
||||
// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// IR-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
|
||||
// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// IR: omp.inner.for.cond:
|
||||
// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
|
||||
// IR-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
|
||||
// IR: omp.inner.for.cond.cleanup:
|
||||
// IR-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
|
||||
// IR: omp.inner.for.body:
|
||||
// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 64
|
||||
// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
|
||||
// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// IR-NEXT: store i32 [[ADD]], ptr [[I]], align 4
|
||||
// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 64
|
||||
// IR-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 64
|
||||
// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]]
|
||||
// IR-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
|
||||
// IR-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
|
||||
// IR-NEXT: store i32 [[ADD6]], ptr [[J]], align 4
|
||||
// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
|
||||
// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4
|
||||
// IR-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
|
||||
// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
|
||||
// IR-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP13]]
|
||||
// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4
|
||||
// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64
|
||||
// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
|
||||
// IR-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX]], align 4
|
||||
// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// IR: omp.body.continue:
|
||||
// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// IR: omp.inner.for.inc:
|
||||
// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1
|
||||
// IR-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
|
||||
// IR-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// IR: omp.inner.for.end:
|
||||
// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// IR: omp.loop.exit:
|
||||
// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
|
||||
// IR-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], ptr null)
|
||||
// IR-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov
|
||||
// IR-PCH-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
// IR-PCH-NEXT: entry:
|
||||
// IR-PCH-NEXT: [[X:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[RESULT:%.*]] = alloca [64 x i32], align 16
|
||||
// IR-PCH-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
|
||||
// IR-PCH-NEXT: store i32 0, ptr [[X]], align 4
|
||||
// IR-PCH-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[RESULT]], i8 0, i64 256, i1 false)
|
||||
// IR-PCH-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP0]], i32 64)
|
||||
// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @_Z3foov.omp_outlined, ptr [[RESULT]])
|
||||
// IR-PCH-NEXT: ret i32 0
|
||||
//
|
||||
//
|
||||
// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined
|
||||
// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[RESULT:%.*]]) #[[ATTR2:[0-9]+]] {
|
||||
// IR-PCH-NEXT: entry:
|
||||
// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// IR-PCH-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR]], align 8
|
||||
// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8
|
||||
// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// IR-PCH-NEXT: store i32 4095, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-PCH-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
|
||||
// IR-PCH-NEXT: [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr null)
|
||||
// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 4095
|
||||
// IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// IR-PCH: cond.true:
|
||||
// IR-PCH-NEXT: br label [[COND_END:%.*]]
|
||||
// IR-PCH: cond.false:
|
||||
// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-PCH-NEXT: br label [[COND_END]]
|
||||
// IR-PCH: cond.end:
|
||||
// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 4095, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
|
||||
// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
||||
// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// IR-PCH-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
|
||||
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// IR-PCH: omp.inner.for.cond:
|
||||
// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-PCH-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
|
||||
// IR-PCH-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
|
||||
// IR-PCH: omp.inner.for.cond.cleanup:
|
||||
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
|
||||
// IR-PCH: omp.inner.for.body:
|
||||
// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 64
|
||||
// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
|
||||
// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I]], align 4
|
||||
// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-PCH-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 64
|
||||
// IR-PCH-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 64
|
||||
// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]]
|
||||
// IR-PCH-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
|
||||
// IR-PCH-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
|
||||
// IR-PCH-NEXT: store i32 [[ADD6]], ptr [[J]], align 4
|
||||
// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
|
||||
// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4
|
||||
// IR-PCH-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
|
||||
// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
|
||||
// IR-PCH-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP13]]
|
||||
// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4
|
||||
// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64
|
||||
// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
|
||||
// IR-PCH-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX]], align 4
|
||||
// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// IR-PCH: omp.body.continue:
|
||||
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// IR-PCH: omp.inner.for.inc:
|
||||
// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-PCH-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1
|
||||
// IR-PCH-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
|
||||
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// IR-PCH: omp.inner.for.end:
|
||||
// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// IR-PCH: omp.loop.exit:
|
||||
// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
|
||||
// IR-PCH-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], ptr null)
|
||||
// IR-PCH-NEXT: ret void
|
||||
//
|
||||
8390
clang/test/OpenMP/target_parallel_generic_loop_codegen-1.cpp
Normal file
8390
clang/test/OpenMP/target_parallel_generic_loop_codegen-1.cpp
Normal file
File diff suppressed because it is too large
Load Diff
952
clang/test/OpenMP/target_parallel_generic_loop_codegen-2.cpp
Normal file
952
clang/test/OpenMP/target_parallel_generic_loop_codegen-2.cpp
Normal file
@ -0,0 +1,952 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 2
|
||||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-X86
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK-X86
|
||||
|
||||
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
|
||||
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0-X86 %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0-X86 %s
|
||||
|
||||
// Test target parallel for codegen - host bc file has to be created first.
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK-TARGET
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK-TARGET
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK-TARGET-X86
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK-TARGET-X86
|
||||
|
||||
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1-TARGET %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1-TARGET %s
|
||||
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1-TARGET-X86 %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1-TARGET-X86 %s
|
||||
|
||||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
|
||||
int nested(int a){
|
||||
#pragma omp target parallel loop
|
||||
for (int i = 0; i < 10; ++i)
|
||||
++a;
|
||||
|
||||
auto F = [&](){
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp target parallel loop
|
||||
for (int i = 0; i < 10; ++i)
|
||||
++a;
|
||||
}
|
||||
};
|
||||
|
||||
F();
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Check metadata is properly generated:
|
||||
|
||||
#endif
|
||||
// CHECK-LABEL: define dso_local noundef signext i32 @_Z6nestedi
|
||||
// CHECK-SAME: (i32 noundef signext [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
|
||||
// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
|
||||
// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
|
||||
// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
|
||||
// CHECK-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
||||
// CHECK-NEXT: [[F:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
|
||||
// CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
||||
// CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
||||
// CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
||||
// CHECK-NEXT: store ptr null, ptr [[TMP4]], align 8
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
||||
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
||||
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
||||
// CHECK-NEXT: store i32 2, ptr [[TMP7]], align 4
|
||||
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
||||
// CHECK-NEXT: store i32 1, ptr [[TMP8]], align 4
|
||||
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
||||
// CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8
|
||||
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
||||
// CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
|
||||
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
||||
// CHECK-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8
|
||||
// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
||||
// CHECK-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8
|
||||
// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
||||
// CHECK-NEXT: store ptr null, ptr [[TMP13]], align 8
|
||||
// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
||||
// CHECK-NEXT: store ptr null, ptr [[TMP14]], align 8
|
||||
// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
||||
// CHECK-NEXT: store i64 0, ptr [[TMP15]], align 8
|
||||
// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
||||
// CHECK-NEXT: store i64 0, ptr [[TMP16]], align 8
|
||||
// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
||||
// CHECK-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP17]], align 4
|
||||
// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
||||
// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4
|
||||
// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
||||
// CHECK-NEXT: store i32 0, ptr [[TMP19]], align 4
|
||||
// CHECK-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.region_id, ptr [[KERNEL_ARGS]])
|
||||
// CHECK-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
|
||||
// CHECK-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
||||
// CHECK: omp_offload.failed:
|
||||
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42(i64 [[TMP1]]) #[[ATTR3:[0-9]+]]
|
||||
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
||||
// CHECK: omp_offload.cont:
|
||||
// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[F]], i32 0, i32 0
|
||||
// CHECK-NEXT: store ptr [[A_ADDR]], ptr [[TMP22]], align 8
|
||||
// CHECK-NEXT: call void @"_ZZ6nestediENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[F]])
|
||||
// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: ret i32 [[TMP23]]
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42
|
||||
// CHECK-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
|
||||
// CHECK-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
|
||||
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined, i64 [[TMP1]])
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined
|
||||
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR2:[0-9]+]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// CHECK-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
|
||||
// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// CHECK: cond.true:
|
||||
// CHECK-NEXT: br label [[COND_END:%.*]]
|
||||
// CHECK: cond.false:
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: br label [[COND_END]]
|
||||
// CHECK: cond.end:
|
||||
// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
|
||||
// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// CHECK: omp.inner.for.cond:
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
|
||||
// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// CHECK: omp.inner.for.body:
|
||||
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
|
||||
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4
|
||||
// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
|
||||
// CHECK-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// CHECK: omp.body.continue:
|
||||
// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// CHECK: omp.inner.for.inc:
|
||||
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
|
||||
// CHECK-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// CHECK: omp.inner.for.end:
|
||||
// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// CHECK: omp.loop.exit:
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49
|
||||
// CHECK-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
|
||||
// CHECK-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
|
||||
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined, i64 [[TMP1]])
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined
|
||||
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR2]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// CHECK-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
|
||||
// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// CHECK: cond.true:
|
||||
// CHECK-NEXT: br label [[COND_END:%.*]]
|
||||
// CHECK: cond.false:
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: br label [[COND_END]]
|
||||
// CHECK: cond.end:
|
||||
// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
|
||||
// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// CHECK: omp.inner.for.cond:
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
|
||||
// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// CHECK: omp.inner.for.body:
|
||||
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
|
||||
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4
|
||||
// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
|
||||
// CHECK-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// CHECK: omp.body.continue:
|
||||
// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// CHECK: omp.inner.for.inc:
|
||||
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
|
||||
// CHECK-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// CHECK: omp.inner.for.end:
|
||||
// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// CHECK: omp.loop.exit:
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define internal void @.omp_offloading.requires_reg
|
||||
// CHECK-SAME: () #[[ATTR4:[0-9]+]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: call void @__tgt_register_requires(i64 1)
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-X86-LABEL: define dso_local noundef i32 @_Z6nestedi
|
||||
// CHECK-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
// CHECK-X86-NEXT: entry:
|
||||
// CHECK-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
|
||||
// CHECK-X86-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
|
||||
// CHECK-X86-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
|
||||
// CHECK-X86-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
||||
// CHECK-X86-NEXT: [[F:%.*]] = alloca [[CLASS_ANON:%.*]], align 4
|
||||
// CHECK-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// CHECK-X86-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
||||
// CHECK-X86-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
||||
// CHECK-X86-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
|
||||
// CHECK-X86-NEXT: store ptr null, ptr [[TMP4]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
||||
// CHECK-X86-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
||||
// CHECK-X86-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
||||
// CHECK-X86-NEXT: store i32 2, ptr [[TMP7]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
||||
// CHECK-X86-NEXT: store i32 1, ptr [[TMP8]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
||||
// CHECK-X86-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
||||
// CHECK-X86-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
||||
// CHECK-X86-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
||||
// CHECK-X86-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
||||
// CHECK-X86-NEXT: store ptr null, ptr [[TMP13]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
||||
// CHECK-X86-NEXT: store ptr null, ptr [[TMP14]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
||||
// CHECK-X86-NEXT: store i64 0, ptr [[TMP15]], align 8
|
||||
// CHECK-X86-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
||||
// CHECK-X86-NEXT: store i64 0, ptr [[TMP16]], align 8
|
||||
// CHECK-X86-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
||||
// CHECK-X86-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP17]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
||||
// CHECK-X86-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
||||
// CHECK-X86-NEXT: store i32 0, ptr [[TMP19]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.region_id, ptr [[KERNEL_ARGS]])
|
||||
// CHECK-X86-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
|
||||
// CHECK-X86-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
||||
// CHECK-X86: omp_offload.failed:
|
||||
// CHECK-X86-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42(i32 [[TMP1]]) #[[ATTR3:[0-9]+]]
|
||||
// CHECK-X86-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
||||
// CHECK-X86: omp_offload.cont:
|
||||
// CHECK-X86-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[F]], i32 0, i32 0
|
||||
// CHECK-X86-NEXT: store ptr [[A_ADDR]], ptr [[TMP22]], align 4
|
||||
// CHECK-X86-NEXT: call void @"_ZZ6nestediENK3$_0clEv"(ptr noundef nonnull align 4 dereferenceable(4) [[F]])
|
||||
// CHECK-X86-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// CHECK-X86-NEXT: ret i32 [[TMP23]]
|
||||
//
|
||||
//
|
||||
// CHECK-X86-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42
|
||||
// CHECK-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
// CHECK-X86-NEXT: entry:
|
||||
// CHECK-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// CHECK-X86-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4
|
||||
// CHECK-X86-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined, i32 [[TMP1]])
|
||||
// CHECK-X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-X86-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined
|
||||
// CHECK-X86-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR2:[0-9]+]] {
|
||||
// CHECK-X86-NEXT: entry:
|
||||
// CHECK-X86-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
|
||||
// CHECK-X86-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
|
||||
// CHECK-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
||||
// CHECK-X86-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
|
||||
// CHECK-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
||||
// CHECK-X86-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK-X86-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-X86-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK-X86-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
|
||||
// CHECK-X86-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// CHECK-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
|
||||
// CHECK-X86-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// CHECK-X86: cond.true:
|
||||
// CHECK-X86-NEXT: br label [[COND_END:%.*]]
|
||||
// CHECK-X86: cond.false:
|
||||
// CHECK-X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-X86-NEXT: br label [[COND_END]]
|
||||
// CHECK-X86: cond.end:
|
||||
// CHECK-X86-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
|
||||
// CHECK-X86-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK-X86-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-X86-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// CHECK-X86: omp.inner.for.cond:
|
||||
// CHECK-X86-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-X86-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
|
||||
// CHECK-X86-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// CHECK-X86: omp.inner.for.body:
|
||||
// CHECK-X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-X86-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
|
||||
// CHECK-X86-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// CHECK-X86-NEXT: store i32 [[ADD]], ptr [[I]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// CHECK-X86-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
|
||||
// CHECK-X86-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
|
||||
// CHECK-X86-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// CHECK-X86: omp.body.continue:
|
||||
// CHECK-X86-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// CHECK-X86: omp.inner.for.inc:
|
||||
// CHECK-X86-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-X86-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
|
||||
// CHECK-X86-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-X86-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// CHECK-X86: omp.inner.for.end:
|
||||
// CHECK-X86-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// CHECK-X86: omp.loop.exit:
|
||||
// CHECK-X86-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
|
||||
// CHECK-X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-X86-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49
|
||||
// CHECK-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR1]] {
|
||||
// CHECK-X86-NEXT: entry:
|
||||
// CHECK-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// CHECK-X86-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4
|
||||
// CHECK-X86-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined, i32 [[TMP1]])
|
||||
// CHECK-X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-X86-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined
|
||||
// CHECK-X86-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR2]] {
|
||||
// CHECK-X86-NEXT: entry:
|
||||
// CHECK-X86-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
|
||||
// CHECK-X86-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
|
||||
// CHECK-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// CHECK-X86-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
||||
// CHECK-X86-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
|
||||
// CHECK-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
||||
// CHECK-X86-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK-X86-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-X86-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK-X86-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
|
||||
// CHECK-X86-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// CHECK-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
|
||||
// CHECK-X86-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// CHECK-X86: cond.true:
|
||||
// CHECK-X86-NEXT: br label [[COND_END:%.*]]
|
||||
// CHECK-X86: cond.false:
|
||||
// CHECK-X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-X86-NEXT: br label [[COND_END]]
|
||||
// CHECK-X86: cond.end:
|
||||
// CHECK-X86-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
|
||||
// CHECK-X86-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK-X86-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-X86-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// CHECK-X86: omp.inner.for.cond:
|
||||
// CHECK-X86-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-X86-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
|
||||
// CHECK-X86-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// CHECK-X86: omp.inner.for.body:
|
||||
// CHECK-X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-X86-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
|
||||
// CHECK-X86-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// CHECK-X86-NEXT: store i32 [[ADD]], ptr [[I]], align 4
|
||||
// CHECK-X86-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// CHECK-X86-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
|
||||
// CHECK-X86-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
|
||||
// CHECK-X86-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// CHECK-X86: omp.body.continue:
|
||||
// CHECK-X86-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// CHECK-X86: omp.inner.for.inc:
|
||||
// CHECK-X86-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-X86-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
|
||||
// CHECK-X86-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-X86-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// CHECK-X86: omp.inner.for.end:
|
||||
// CHECK-X86-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// CHECK-X86: omp.loop.exit:
|
||||
// CHECK-X86-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
|
||||
// CHECK-X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-X86-LABEL: define internal void @.omp_offloading.requires_reg
|
||||
// CHECK-X86-SAME: () #[[ATTR4:[0-9]+]] {
|
||||
// CHECK-X86-NEXT: entry:
|
||||
// CHECK-X86-NEXT: call void @__tgt_register_requires(i64 1)
|
||||
// CHECK-X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// SIMD-ONLY0-LABEL: define dso_local noundef signext i32 @_Z6nestedi
|
||||
// SIMD-ONLY0-SAME: (i32 noundef signext [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
// SIMD-ONLY0-NEXT: entry:
|
||||
// SIMD-ONLY0-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// SIMD-ONLY0-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// SIMD-ONLY0-NEXT: [[F:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
|
||||
// SIMD-ONLY0-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
||||
// SIMD-ONLY0-NEXT: store i32 0, ptr [[I]], align 4
|
||||
// SIMD-ONLY0-NEXT: br label [[FOR_COND:%.*]]
|
||||
// SIMD-ONLY0: for.cond:
|
||||
// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
|
||||
// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10
|
||||
// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
|
||||
// SIMD-ONLY0: for.body:
|
||||
// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// SIMD-ONLY0-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
|
||||
// SIMD-ONLY0-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
|
||||
// SIMD-ONLY0-NEXT: br label [[FOR_INC:%.*]]
|
||||
// SIMD-ONLY0: for.inc:
|
||||
// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
|
||||
// SIMD-ONLY0-NEXT: [[INC1:%.*]] = add nsw i32 [[TMP2]], 1
|
||||
// SIMD-ONLY0-NEXT: store i32 [[INC1]], ptr [[I]], align 4
|
||||
// SIMD-ONLY0-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
|
||||
// SIMD-ONLY0: for.end:
|
||||
// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[F]], i32 0, i32 0
|
||||
// SIMD-ONLY0-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 8
|
||||
// SIMD-ONLY0-NEXT: call void @"_ZZ6nestediENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[F]])
|
||||
// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// SIMD-ONLY0-NEXT: ret i32 [[TMP4]]
|
||||
//
|
||||
//
|
||||
// SIMD-ONLY0-X86-LABEL: define dso_local noundef i32 @_Z6nestedi
|
||||
// SIMD-ONLY0-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
// SIMD-ONLY0-X86-NEXT: entry:
|
||||
// SIMD-ONLY0-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// SIMD-ONLY0-X86-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// SIMD-ONLY0-X86-NEXT: [[F:%.*]] = alloca [[CLASS_ANON:%.*]], align 4
|
||||
// SIMD-ONLY0-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
||||
// SIMD-ONLY0-X86-NEXT: store i32 0, ptr [[I]], align 4
|
||||
// SIMD-ONLY0-X86-NEXT: br label [[FOR_COND:%.*]]
|
||||
// SIMD-ONLY0-X86: for.cond:
|
||||
// SIMD-ONLY0-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
|
||||
// SIMD-ONLY0-X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10
|
||||
// SIMD-ONLY0-X86-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
|
||||
// SIMD-ONLY0-X86: for.body:
|
||||
// SIMD-ONLY0-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// SIMD-ONLY0-X86-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
|
||||
// SIMD-ONLY0-X86-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
|
||||
// SIMD-ONLY0-X86-NEXT: br label [[FOR_INC:%.*]]
|
||||
// SIMD-ONLY0-X86: for.inc:
|
||||
// SIMD-ONLY0-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
|
||||
// SIMD-ONLY0-X86-NEXT: [[INC1:%.*]] = add nsw i32 [[TMP2]], 1
|
||||
// SIMD-ONLY0-X86-NEXT: store i32 [[INC1]], ptr [[I]], align 4
|
||||
// SIMD-ONLY0-X86-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
|
||||
// SIMD-ONLY0-X86: for.end:
|
||||
// SIMD-ONLY0-X86-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[F]], i32 0, i32 0
|
||||
// SIMD-ONLY0-X86-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 4
|
||||
// SIMD-ONLY0-X86-NEXT: call void @"_ZZ6nestediENK3$_0clEv"(ptr noundef nonnull align 4 dereferenceable(4) [[F]])
|
||||
// SIMD-ONLY0-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// SIMD-ONLY0-X86-NEXT: ret i32 [[TMP4]]
|
||||
//
|
||||
//
|
||||
// TCHECK-TARGET-LABEL: define weak_odr protected void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42
|
||||
// TCHECK-TARGET-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
// TCHECK-TARGET-NEXT: entry:
|
||||
// TCHECK-TARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
|
||||
// TCHECK-TARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
|
||||
// TCHECK-TARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
|
||||
// TCHECK-TARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// TCHECK-TARGET-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
|
||||
// TCHECK-TARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined, i64 [[TMP1]])
|
||||
// TCHECK-TARGET-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// TCHECK-TARGET-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined
|
||||
// TCHECK-TARGET-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
// TCHECK-TARGET-NEXT: entry:
|
||||
// TCHECK-TARGET-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// TCHECK-TARGET-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// TCHECK-TARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
|
||||
// TCHECK-TARGET-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// TCHECK-TARGET-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// TCHECK-TARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
|
||||
// TCHECK-TARGET-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// TCHECK-TARGET-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// TCHECK-TARGET-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// TCHECK-TARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
|
||||
// TCHECK-TARGET-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// TCHECK-TARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
|
||||
// TCHECK-TARGET-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// TCHECK-TARGET: cond.true:
|
||||
// TCHECK-TARGET-NEXT: br label [[COND_END:%.*]]
|
||||
// TCHECK-TARGET: cond.false:
|
||||
// TCHECK-TARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-NEXT: br label [[COND_END]]
|
||||
// TCHECK-TARGET: cond.end:
|
||||
// TCHECK-TARGET-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
|
||||
// TCHECK-TARGET-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// TCHECK-TARGET-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// TCHECK-TARGET: omp.inner.for.cond:
|
||||
// TCHECK-TARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
|
||||
// TCHECK-TARGET-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// TCHECK-TARGET: omp.inner.for.body:
|
||||
// TCHECK-TARGET-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
|
||||
// TCHECK-TARGET-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// TCHECK-TARGET-NEXT: store i32 [[ADD]], ptr [[I]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
|
||||
// TCHECK-TARGET-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
|
||||
// TCHECK-TARGET-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// TCHECK-TARGET: omp.body.continue:
|
||||
// TCHECK-TARGET-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// TCHECK-TARGET: omp.inner.for.inc:
|
||||
// TCHECK-TARGET-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
|
||||
// TCHECK-TARGET-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// TCHECK-TARGET: omp.inner.for.end:
|
||||
// TCHECK-TARGET-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// TCHECK-TARGET: omp.loop.exit:
|
||||
// TCHECK-TARGET-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
|
||||
// TCHECK-TARGET-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// TCHECK-TARGET-LABEL: define weak_odr protected void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49
|
||||
// TCHECK-TARGET-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0]] {
|
||||
// TCHECK-TARGET-NEXT: entry:
|
||||
// TCHECK-TARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
|
||||
// TCHECK-TARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
|
||||
// TCHECK-TARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
|
||||
// TCHECK-TARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// TCHECK-TARGET-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
|
||||
// TCHECK-TARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined, i64 [[TMP1]])
|
||||
// TCHECK-TARGET-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// TCHECK-TARGET-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined
|
||||
// TCHECK-TARGET-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] {
|
||||
// TCHECK-TARGET-NEXT: entry:
|
||||
// TCHECK-TARGET-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// TCHECK-TARGET-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// TCHECK-TARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
|
||||
// TCHECK-TARGET-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// TCHECK-TARGET-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// TCHECK-TARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
|
||||
// TCHECK-TARGET-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// TCHECK-TARGET-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// TCHECK-TARGET-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// TCHECK-TARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
|
||||
// TCHECK-TARGET-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// TCHECK-TARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
|
||||
// TCHECK-TARGET-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// TCHECK-TARGET: cond.true:
|
||||
// TCHECK-TARGET-NEXT: br label [[COND_END:%.*]]
|
||||
// TCHECK-TARGET: cond.false:
|
||||
// TCHECK-TARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-NEXT: br label [[COND_END]]
|
||||
// TCHECK-TARGET: cond.end:
|
||||
// TCHECK-TARGET-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
|
||||
// TCHECK-TARGET-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// TCHECK-TARGET-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// TCHECK-TARGET: omp.inner.for.cond:
|
||||
// TCHECK-TARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
|
||||
// TCHECK-TARGET-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// TCHECK-TARGET: omp.inner.for.body:
|
||||
// TCHECK-TARGET-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
|
||||
// TCHECK-TARGET-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// TCHECK-TARGET-NEXT: store i32 [[ADD]], ptr [[I]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
|
||||
// TCHECK-TARGET-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
|
||||
// TCHECK-TARGET-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// TCHECK-TARGET: omp.body.continue:
|
||||
// TCHECK-TARGET-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// TCHECK-TARGET: omp.inner.for.inc:
|
||||
// TCHECK-TARGET-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
|
||||
// TCHECK-TARGET-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// TCHECK-TARGET: omp.inner.for.end:
|
||||
// TCHECK-TARGET-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// TCHECK-TARGET: omp.loop.exit:
|
||||
// TCHECK-TARGET-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
|
||||
// TCHECK-TARGET-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// TCHECK-TARGET-X86-LABEL: define weak_odr protected void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42
|
||||
// TCHECK-TARGET-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
// TCHECK-TARGET-X86-NEXT: entry:
|
||||
// TCHECK-TARGET-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined, i32 [[TMP1]])
|
||||
// TCHECK-TARGET-X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// TCHECK-TARGET-X86-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined
|
||||
// TCHECK-TARGET-X86-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
// TCHECK-TARGET-X86-NEXT: entry:
|
||||
// TCHECK-TARGET-X86-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
|
||||
// TCHECK-TARGET-X86-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// TCHECK-TARGET-X86: cond.true:
|
||||
// TCHECK-TARGET-X86-NEXT: br label [[COND_END:%.*]]
|
||||
// TCHECK-TARGET-X86: cond.false:
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: br label [[COND_END]]
|
||||
// TCHECK-TARGET-X86: cond.end:
|
||||
// TCHECK-TARGET-X86-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// TCHECK-TARGET-X86: omp.inner.for.cond:
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
|
||||
// TCHECK-TARGET-X86-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// TCHECK-TARGET-X86: omp.inner.for.body:
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
|
||||
// TCHECK-TARGET-X86-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 [[ADD]], ptr [[I]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// TCHECK-TARGET-X86: omp.body.continue:
|
||||
// TCHECK-TARGET-X86-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// TCHECK-TARGET-X86: omp.inner.for.inc:
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// TCHECK-TARGET-X86: omp.inner.for.end:
|
||||
// TCHECK-TARGET-X86-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// TCHECK-TARGET-X86: omp.loop.exit:
|
||||
// TCHECK-TARGET-X86-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
|
||||
// TCHECK-TARGET-X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// TCHECK-TARGET-X86-LABEL: define weak_odr protected void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49
|
||||
// TCHECK-TARGET-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0]] {
|
||||
// TCHECK-TARGET-X86-NEXT: entry:
|
||||
// TCHECK-TARGET-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined, i32 [[TMP1]])
|
||||
// TCHECK-TARGET-X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// TCHECK-TARGET-X86-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined
|
||||
// TCHECK-TARGET-X86-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] {
|
||||
// TCHECK-TARGET-X86-NEXT: entry:
|
||||
// TCHECK-TARGET-X86-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
|
||||
// TCHECK-TARGET-X86-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// TCHECK-TARGET-X86: cond.true:
|
||||
// TCHECK-TARGET-X86-NEXT: br label [[COND_END:%.*]]
|
||||
// TCHECK-TARGET-X86: cond.false:
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: br label [[COND_END]]
|
||||
// TCHECK-TARGET-X86: cond.end:
|
||||
// TCHECK-TARGET-X86-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// TCHECK-TARGET-X86: omp.inner.for.cond:
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
|
||||
// TCHECK-TARGET-X86-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// TCHECK-TARGET-X86: omp.inner.for.body:
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
|
||||
// TCHECK-TARGET-X86-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 [[ADD]], ptr [[I]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// TCHECK-TARGET-X86: omp.body.continue:
|
||||
// TCHECK-TARGET-X86-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// TCHECK-TARGET-X86: omp.inner.for.inc:
|
||||
// TCHECK-TARGET-X86-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
|
||||
// TCHECK-TARGET-X86-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
|
||||
// TCHECK-TARGET-X86-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// TCHECK-TARGET-X86: omp.inner.for.end:
|
||||
// TCHECK-TARGET-X86-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// TCHECK-TARGET-X86: omp.loop.exit:
|
||||
// TCHECK-TARGET-X86-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
|
||||
// TCHECK-TARGET-X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// SIMD-ONLY1-TARGET-LABEL: define dso_local noundef signext i32 @_Z6nestedi
|
||||
// SIMD-ONLY1-TARGET-SAME: (i32 noundef signext [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
// SIMD-ONLY1-TARGET-NEXT: entry:
|
||||
// SIMD-ONLY1-TARGET-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// SIMD-ONLY1-TARGET-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// SIMD-ONLY1-TARGET-NEXT: [[F:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
|
||||
// SIMD-ONLY1-TARGET-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
||||
// SIMD-ONLY1-TARGET-NEXT: store i32 0, ptr [[I]], align 4
|
||||
// SIMD-ONLY1-TARGET-NEXT: br label [[FOR_COND:%.*]]
|
||||
// SIMD-ONLY1-TARGET: for.cond:
|
||||
// SIMD-ONLY1-TARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
|
||||
// SIMD-ONLY1-TARGET-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10
|
||||
// SIMD-ONLY1-TARGET-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
|
||||
// SIMD-ONLY1-TARGET: for.body:
|
||||
// SIMD-ONLY1-TARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// SIMD-ONLY1-TARGET-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
|
||||
// SIMD-ONLY1-TARGET-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
|
||||
// SIMD-ONLY1-TARGET-NEXT: br label [[FOR_INC:%.*]]
|
||||
// SIMD-ONLY1-TARGET: for.inc:
|
||||
// SIMD-ONLY1-TARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
|
||||
// SIMD-ONLY1-TARGET-NEXT: [[INC1:%.*]] = add nsw i32 [[TMP2]], 1
|
||||
// SIMD-ONLY1-TARGET-NEXT: store i32 [[INC1]], ptr [[I]], align 4
|
||||
// SIMD-ONLY1-TARGET-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
|
||||
// SIMD-ONLY1-TARGET: for.end:
|
||||
// SIMD-ONLY1-TARGET-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[F]], i32 0, i32 0
|
||||
// SIMD-ONLY1-TARGET-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 8
|
||||
// SIMD-ONLY1-TARGET-NEXT: call void @"_ZZ6nestediENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[F]])
|
||||
// SIMD-ONLY1-TARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// SIMD-ONLY1-TARGET-NEXT: ret i32 [[TMP4]]
|
||||
//
|
||||
//
|
||||
// SIMD-ONLY1-TARGET-X86-LABEL: define dso_local noundef i32 @_Z6nestedi
|
||||
// SIMD-ONLY1-TARGET-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: entry:
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: [[F:%.*]] = alloca [[CLASS_ANON:%.*]], align 4
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: store i32 0, ptr [[I]], align 4
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: br label [[FOR_COND:%.*]]
|
||||
// SIMD-ONLY1-TARGET-X86: for.cond:
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
|
||||
// SIMD-ONLY1-TARGET-X86: for.body:
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: br label [[FOR_INC:%.*]]
|
||||
// SIMD-ONLY1-TARGET-X86: for.inc:
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: [[INC1:%.*]] = add nsw i32 [[TMP2]], 1
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: store i32 [[INC1]], ptr [[I]], align 4
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
|
||||
// SIMD-ONLY1-TARGET-X86: for.end:
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[F]], i32 0, i32 0
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 4
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: call void @"_ZZ6nestediENK3$_0clEv"(ptr noundef nonnull align 4 dereferenceable(4) [[F]])
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4
|
||||
// SIMD-ONLY1-TARGET-X86-NEXT: ret i32 [[TMP4]]
|
||||
//
|
||||
921
clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp
Normal file
921
clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp
Normal file
@ -0,0 +1,921 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
|
||||
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s --check-prefix=CHECK1
|
||||
// expected-no-diagnostics
|
||||
|
||||
int main() {
|
||||
/* int(*b)[a]; */
|
||||
/* int *(**c)[a]; */
|
||||
bool bb;
|
||||
int a;
|
||||
int b[10][10];
|
||||
int c[10][10][10];
|
||||
#pragma omp target parallel loop firstprivate(a, b) map(tofrom \
|
||||
: c) map(tofrom \
|
||||
: bb) if (a)
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
int &f = c[1][1][1];
|
||||
int &g = a;
|
||||
int &h = b[1][1];
|
||||
int d = 15;
|
||||
a = 5;
|
||||
b[0][a] = 10;
|
||||
c[0][0][a] = 11;
|
||||
b[0][a] = c[0][0][a];
|
||||
bb |= b[0][a];
|
||||
}
|
||||
#pragma omp target parallel loop firstprivate(a) map(tofrom \
|
||||
: c, b) map(to \
|
||||
: bb)
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
int &f = c[1][1][1];
|
||||
int &g = a;
|
||||
int &h = b[1][1];
|
||||
int d = 15;
|
||||
a = 5;
|
||||
b[0][a] = 10;
|
||||
c[0][0][a] = 11;
|
||||
b[0][a] = c[0][0][a];
|
||||
d = bb;
|
||||
}
|
||||
#pragma omp target parallel loop map(tofrom \
|
||||
: a, c, b) map(from \
|
||||
: bb)
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
int &f = c[1][1][1];
|
||||
int &g = a;
|
||||
int &h = b[1][1];
|
||||
int d = 15;
|
||||
a = 5;
|
||||
b[0][a] = 10;
|
||||
c[0][0][a] = 11;
|
||||
b[0][a] = c[0][0][a];
|
||||
bb = b[0][a];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__
|
||||
// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]], i1 noundef zeroext [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG13:![0-9]+]] {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
|
||||
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
|
||||
// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i8, align 1
|
||||
// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
|
||||
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
|
||||
// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META31:![0-9]+]], metadata !DIExpression()), !dbg [[DBG32:![0-9]+]]
|
||||
// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META33:![0-9]+]], metadata !DIExpression()), !dbg [[DBG34:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META35:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG38:![0-9]+]]
|
||||
// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[DOTCAPTURE_EXPR_]] to i8
|
||||
// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR__ADDR]], metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG41:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG41]]
|
||||
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG41]]
|
||||
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG41]]
|
||||
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG41]]
|
||||
// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG41]]
|
||||
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG41]]
|
||||
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG41]]
|
||||
// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG41]]
|
||||
// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG41]]
|
||||
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG41]]
|
||||
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG41]]
|
||||
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG41]]
|
||||
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG41]]
|
||||
// CHECK1: user_code.entry:
|
||||
// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]])
|
||||
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG42:![0-9]+]]
|
||||
// CHECK1-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4, !dbg [[DBG42]]
|
||||
// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG42]]
|
||||
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG42]]
|
||||
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG42]]
|
||||
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG42]]
|
||||
// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG42]]
|
||||
// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG42]]
|
||||
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG42]]
|
||||
// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG42]]
|
||||
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG42]]
|
||||
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG42]]
|
||||
// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG43:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG43]]
|
||||
// CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]]
|
||||
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG42]]
|
||||
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB8:[0-9]+]], i8 2), !dbg [[DBG45:![0-9]+]]
|
||||
// CHECK1-NEXT: ret void, !dbg [[DBG46:![0-9]+]]
|
||||
// CHECK1: worker.exit:
|
||||
// CHECK1-NEXT: ret void, !dbg [[DBG41]]
|
||||
//
|
||||
//
|
||||
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined_debug__
|
||||
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG47:![0-9]+]] {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
|
||||
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
|
||||
// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[B4:%.*]] = alloca [10 x [10 x i32]], align 4
|
||||
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META54:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META56:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]]
|
||||
// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META57:![0-9]+]], metadata !DIExpression()), !dbg [[DBG58:![0-9]+]]
|
||||
// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META61:![0-9]+]], metadata !DIExpression()), !dbg [[DBG62:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG64:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG65:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]]
|
||||
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68:![0-9]+]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]]
|
||||
// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]]
|
||||
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]]
|
||||
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B4]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]]
|
||||
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]]
|
||||
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG74:![0-9]+]]
|
||||
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG65]]
|
||||
// CHECK1: omp.dispatch.cond:
|
||||
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9, !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG68]]
|
||||
// CHECK1: cond.true:
|
||||
// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG68]]
|
||||
// CHECK1: cond.false:
|
||||
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG68]]
|
||||
// CHECK1: cond.end:
|
||||
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ], !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]], !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG65]]
|
||||
// CHECK1: omp.dispatch.body:
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG65]]
|
||||
// CHECK1: omp.inner.for.cond:
|
||||
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]], !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG65]]
|
||||
// CHECK1: omp.inner.for.body:
|
||||
// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1, !dbg [[DBG75:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG75]]
|
||||
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG75]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG80:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG80]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG80]]
|
||||
// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[DBG79]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META81:![0-9]+]], metadata !DIExpression()), !dbg [[DBG82:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG82]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG84:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 1, !dbg [[DBG85:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG85]]
|
||||
// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[DBG84]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META86:![0-9]+]], metadata !DIExpression()), !dbg [[DBG87:![0-9]+]]
|
||||
// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG87]]
|
||||
// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG88:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG89:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG90:![0-9]+]]
|
||||
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG89]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG89]]
|
||||
// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG91:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG92:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG92]]
|
||||
// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG93:![0-9]+]]
|
||||
// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG92]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG92]]
|
||||
// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG94:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG95:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG95]]
|
||||
// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG96:![0-9]+]]
|
||||
// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG95]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG95]]
|
||||
// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG95]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG97:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG98:![0-9]+]]
|
||||
// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG97]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG97]]
|
||||
// CHECK1-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG99:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG100:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG101:![0-9]+]]
|
||||
// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG100]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG100]]
|
||||
// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG100]]
|
||||
// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG102:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP25]] to i1, !dbg [[DBG102]]
|
||||
// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG102]]
|
||||
// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP24]], !dbg [[DBG102]]
|
||||
// CHECK1-NEXT: [[TOBOOL27:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG102]]
|
||||
// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL27]] to i8, !dbg [[DBG102]]
|
||||
// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP7]], align 1, !dbg [[DBG102]]
|
||||
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG103:![0-9]+]]
|
||||
// CHECK1: omp.body.continue:
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG74]]
|
||||
// CHECK1: omp.inner.for.inc:
|
||||
// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP26]], 1, !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG74]], !llvm.loop [[LOOP104:![0-9]+]]
|
||||
// CHECK1: omp.inner.for.end:
|
||||
// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG74]]
|
||||
// CHECK1: omp.dispatch.inc:
|
||||
// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP27]], [[TMP28]], !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]]
|
||||
// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: store i32 [[ADD30]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG65]]
|
||||
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG74]], !llvm.loop [[LOOP106:![0-9]+]]
|
||||
// CHECK1: omp.dispatch.end:
|
||||
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5:[0-9]+]], i32 [[TMP9]]), !dbg [[DBG105:![0-9]+]]
|
||||
// CHECK1-NEXT: ret void, !dbg [[DBG107:![0-9]+]]
|
||||
//
|
||||
//
|
||||
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined
|
||||
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG108:![0-9]+]] {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]]
|
||||
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]]
|
||||
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META119:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]]
|
||||
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]]
|
||||
// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META121:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]]
|
||||
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG122:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG122]]
|
||||
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG122]]
|
||||
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG122]]
|
||||
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG122]]
|
||||
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG122]]
|
||||
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG122]]
|
||||
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG122]]
|
||||
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG122]]
|
||||
// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG122]]
|
||||
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG122]]
|
||||
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr [[TMP7]], ptr addrspace(1) [[TMP10]]) #[[ATTR3:[0-9]+]], !dbg [[DBG122]]
|
||||
// CHECK1-NEXT: ret void, !dbg [[DBG122]]
|
||||
//
|
||||
//
|
||||
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13
|
||||
// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR5:[0-9]+]] !dbg [[DBG123:![0-9]+]] {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]]
|
||||
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META128:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]]
|
||||
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]]
|
||||
// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META130:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]]
|
||||
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR__ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]]
|
||||
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG132:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG132]]
|
||||
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG132]]
|
||||
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG132]]
|
||||
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG132]]
|
||||
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG132]]
|
||||
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG132]]
|
||||
// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG132]]
|
||||
// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1, !dbg [[DBG132]]
|
||||
// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG132]]
|
||||
// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG132]]
|
||||
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__(ptr addrspace(1) [[TMP8]], i32 [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP9]], i1 [[TOBOOL]]) #[[ATTR3]], !dbg [[DBG132]]
|
||||
// CHECK1-NEXT: ret void, !dbg [[DBG132]]
|
||||
//
|
||||
//
|
||||
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__
|
||||
// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG133:![0-9]+]] {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
|
||||
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
|
||||
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
|
||||
// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
|
||||
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
|
||||
// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG139:![0-9]+]]
|
||||
// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META140:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG143:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG145:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG146:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG146]]
|
||||
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG146]]
|
||||
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG146]]
|
||||
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG146]]
|
||||
// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG146]]
|
||||
// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG146]]
|
||||
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG146]]
|
||||
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG146]]
|
||||
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG146]]
|
||||
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG146]]
|
||||
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG146]]
|
||||
// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB10:[0-9]+]], i8 2, i1 false), !dbg [[DBG146]]
|
||||
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG146]]
|
||||
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG146]]
|
||||
// CHECK1: user_code.entry:
|
||||
// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]])
|
||||
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG147:![0-9]+]]
|
||||
// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG147]]
|
||||
// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG147]]
|
||||
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG147]]
|
||||
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8, !dbg [[DBG147]]
|
||||
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG147]]
|
||||
// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG147]]
|
||||
// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG147]]
|
||||
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG147]]
|
||||
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG147]]
|
||||
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG147]]
|
||||
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG147]]
|
||||
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]]
|
||||
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG148:![0-9]+]]
|
||||
// CHECK1-NEXT: ret void, !dbg [[DBG150:![0-9]+]]
|
||||
// CHECK1: worker.exit:
|
||||
// CHECK1-NEXT: ret void, !dbg [[DBG146]]
|
||||
//
|
||||
//
|
||||
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined_debug__
|
||||
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG151:![0-9]+]] {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
|
||||
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
|
||||
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
|
||||
// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META154:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META156:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]]
|
||||
// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG158:![0-9]+]]
|
||||
// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META159:![0-9]+]], metadata !DIExpression()), !dbg [[DBG160:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META161:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META163:![0-9]+]], metadata !DIExpression()), !dbg [[DBG164:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG165:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META166:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META167:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]]
|
||||
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG168:![0-9]+]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]]
|
||||
// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]]
|
||||
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]]
|
||||
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]]
|
||||
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]]
|
||||
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG165]]
|
||||
// CHECK1: omp.dispatch.cond:
|
||||
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG168]]
|
||||
// CHECK1: cond.true:
|
||||
// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG168]]
|
||||
// CHECK1: cond.false:
|
||||
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG168]]
|
||||
// CHECK1: cond.end:
|
||||
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG165]]
|
||||
// CHECK1: omp.dispatch.body:
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG165]]
|
||||
// CHECK1: omp.inner.for.cond:
|
||||
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG165]]
|
||||
// CHECK1: omp.inner.for.body:
|
||||
// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG174:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG174]]
|
||||
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG174]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META175:![0-9]+]], metadata !DIExpression()), !dbg [[DBG177:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG178:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG178]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG178]]
|
||||
// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[F]], align 8, !dbg [[DBG177]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META179:![0-9]+]], metadata !DIExpression()), !dbg [[DBG180:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG180]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META181:![0-9]+]], metadata !DIExpression()), !dbg [[DBG182:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 1, !dbg [[DBG183:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 1, !dbg [[DBG183]]
|
||||
// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[H]], align 8, !dbg [[DBG182]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META184:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185:![0-9]+]]
|
||||
// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG185]]
|
||||
// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG186:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG187:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG188:![0-9]+]]
|
||||
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG187]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM]], !dbg [[DBG187]]
|
||||
// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX11]], align 4, !dbg [[DBG189:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG190:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX12]], i64 0, i64 0, !dbg [[DBG190]]
|
||||
// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG191:![0-9]+]]
|
||||
// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG190]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX13]], i64 0, i64 [[IDXPROM14]], !dbg [[DBG190]]
|
||||
// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX15]], align 4, !dbg [[DBG192:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG193:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX16]], i64 0, i64 0, !dbg [[DBG193]]
|
||||
// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG194:![0-9]+]]
|
||||
// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG193]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG193]]
|
||||
// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !dbg [[DBG193]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG195:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG196:![0-9]+]]
|
||||
// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG195]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG195]]
|
||||
// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX22]], align 4, !dbg [[DBG197:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG198:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP24]] to i1, !dbg [[DBG198]]
|
||||
// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG198]]
|
||||
// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG199:![0-9]+]]
|
||||
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG200:![0-9]+]]
|
||||
// CHECK1: omp.body.continue:
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG173]]
|
||||
// CHECK1: omp.inner.for.inc:
|
||||
// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP25]], 1, !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG173]], !llvm.loop [[LOOP201:![0-9]+]]
|
||||
// CHECK1: omp.inner.for.end:
|
||||
// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG173]]
|
||||
// CHECK1: omp.dispatch.inc:
|
||||
// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP26]], [[TMP27]], !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG168]]
|
||||
// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]], !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG165]]
|
||||
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG173]], !llvm.loop [[LOOP203:![0-9]+]]
|
||||
// CHECK1: omp.dispatch.end:
|
||||
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB14:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]]
|
||||
// CHECK1-NEXT: ret void, !dbg [[DBG204:![0-9]+]]
|
||||
//
|
||||
//
|
||||
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined
|
||||
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG205:![0-9]+]] {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META206:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META208:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]]
|
||||
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META209:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]]
|
||||
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META210:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]]
|
||||
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META211:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]]
|
||||
// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META212:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]]
|
||||
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG213:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG213]]
|
||||
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG213]]
|
||||
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG213]]
|
||||
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG213]]
|
||||
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG213]]
|
||||
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG213]]
|
||||
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG213]]
|
||||
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG213]]
|
||||
// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG213]]
|
||||
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG213]]
|
||||
// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG213]]
|
||||
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG213]]
|
||||
// CHECK1-NEXT: ret void, !dbg [[DBG213]]
|
||||
//
|
||||
//
|
||||
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27
|
||||
// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG214:![0-9]+]] {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META217:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218:![0-9]+]]
|
||||
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META219:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218]]
|
||||
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META220:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218]]
|
||||
// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META221:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218]]
|
||||
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG222:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG222]]
|
||||
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG222]]
|
||||
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG222]]
|
||||
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG222]]
|
||||
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG222]]
|
||||
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG222]]
|
||||
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG222]]
|
||||
// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG222]]
|
||||
// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG222]]
|
||||
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__(ptr addrspace(1) [[TMP7]], i32 [[TMP4]], ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]]) #[[ATTR3]], !dbg [[DBG222]]
|
||||
// CHECK1-NEXT: ret void, !dbg [[DBG222]]
|
||||
//
|
||||
//
|
||||
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__
|
||||
// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG223:![0-9]+]] {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
|
||||
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8
|
||||
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
|
||||
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
|
||||
// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
|
||||
// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META228:![0-9]+]], metadata !DIExpression()), !dbg [[DBG229:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META230:![0-9]+]], metadata !DIExpression()), !dbg [[DBG231:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG233:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META234:![0-9]+]], metadata !DIExpression()), !dbg [[DBG235:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG236:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG236]]
|
||||
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG236]]
|
||||
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG236]]
|
||||
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG236]]
|
||||
// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG236]]
|
||||
// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG236]]
|
||||
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG236]]
|
||||
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG236]]
|
||||
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG236]]
|
||||
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG236]]
|
||||
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG236]]
|
||||
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG236]]
|
||||
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG236]]
|
||||
// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG236]]
|
||||
// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG236]]
|
||||
// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB19:[0-9]+]], i8 2, i1 false), !dbg [[DBG236]]
|
||||
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG236]]
|
||||
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG236]]
|
||||
// CHECK1: user_code.entry:
|
||||
// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB24:[0-9]+]])
|
||||
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG237:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG237]]
|
||||
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG237]]
|
||||
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG237]]
|
||||
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG237]]
|
||||
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG237]]
|
||||
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG237]]
|
||||
// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG237]]
|
||||
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]]
|
||||
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB26:[0-9]+]], i8 2), !dbg [[DBG238:![0-9]+]]
|
||||
// CHECK1-NEXT: ret void, !dbg [[DBG240:![0-9]+]]
|
||||
// CHECK1: worker.exit:
|
||||
// CHECK1-NEXT: ret void, !dbg [[DBG236]]
|
||||
//
|
||||
//
|
||||
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined_debug__
|
||||
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG241:![0-9]+]] {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
|
||||
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8
|
||||
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
|
||||
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
|
||||
// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META244:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META246:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]]
|
||||
// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META247:![0-9]+]], metadata !DIExpression()), !dbg [[DBG248:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META249:![0-9]+]], metadata !DIExpression()), !dbg [[DBG250:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META251:![0-9]+]], metadata !DIExpression()), !dbg [[DBG252:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META253:![0-9]+]], metadata !DIExpression()), !dbg [[DBG254:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG255:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META256:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META257:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]]
|
||||
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG258:![0-9]+]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META259:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]]
|
||||
// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META260:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]]
|
||||
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META261:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]]
|
||||
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META262:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]]
|
||||
// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB21:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]]
|
||||
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG255]]
|
||||
// CHECK1: omp.dispatch.cond:
|
||||
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 9, !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG258]]
|
||||
// CHECK1: cond.true:
|
||||
// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG258]]
|
||||
// CHECK1: cond.false:
|
||||
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG258]]
|
||||
// CHECK1: cond.end:
|
||||
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ], !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]], !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG255]]
|
||||
// CHECK1: omp.dispatch.body:
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG255]]
|
||||
// CHECK1: omp.inner.for.cond:
|
||||
// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]], !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG255]]
|
||||
// CHECK1: omp.inner.for.body:
|
||||
// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1, !dbg [[DBG264:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG264]]
|
||||
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG264]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META265:![0-9]+]], metadata !DIExpression()), !dbg [[DBG267:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG268:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG268]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG268]]
|
||||
// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[DBG267]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG270:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[G]], align 8, !dbg [[DBG270]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META271:![0-9]+]], metadata !DIExpression()), !dbg [[DBG272:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 1, !dbg [[DBG273:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG273]]
|
||||
// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[DBG272]]
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META274:![0-9]+]], metadata !DIExpression()), !dbg [[DBG275:![0-9]+]]
|
||||
// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG275]]
|
||||
// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4, !dbg [[DBG276:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG277:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG278:![0-9]+]]
|
||||
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG277]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG277]]
|
||||
// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG279:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG280:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG280]]
|
||||
// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG281:![0-9]+]]
|
||||
// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG280]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG280]]
|
||||
// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG282:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG283:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG283]]
|
||||
// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG284:![0-9]+]]
|
||||
// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG283]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG283]]
|
||||
// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG283]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG285:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG286:![0-9]+]]
|
||||
// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP26]] to i64, !dbg [[DBG285]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG285]]
|
||||
// CHECK1-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG287:![0-9]+]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG288:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG289:![0-9]+]]
|
||||
// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP27]] to i64, !dbg [[DBG288]]
|
||||
// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG288]]
|
||||
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG288]]
|
||||
// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0, !dbg [[DBG288]]
|
||||
// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG290:![0-9]+]]
|
||||
// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP11]], align 1, !dbg [[DBG290]]
|
||||
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG291:![0-9]+]]
|
||||
// CHECK1: omp.body.continue:
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG263]]
|
||||
// CHECK1: omp.inner.for.inc:
|
||||
// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP29]], 1, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: store i32 [[ADD27]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG263]], !llvm.loop [[LOOP292:![0-9]+]]
|
||||
// CHECK1: omp.inner.for.end:
|
||||
// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG263]]
|
||||
// CHECK1: omp.dispatch.inc:
|
||||
// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP30]], [[TMP31]], !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG258]]
|
||||
// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP32]], [[TMP33]], !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG255]]
|
||||
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG263]], !llvm.loop [[LOOP294:![0-9]+]]
|
||||
// CHECK1: omp.dispatch.end:
|
||||
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB23:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]]
|
||||
// CHECK1-NEXT: ret void, !dbg [[DBG295:![0-9]+]]
|
||||
//
|
||||
//
|
||||
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined
|
||||
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG296:![0-9]+]] {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META301:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]]
|
||||
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META302:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]]
|
||||
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META303:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]]
|
||||
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META304:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]]
|
||||
// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]]
|
||||
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG306:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG306]]
|
||||
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG306]]
|
||||
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG306]]
|
||||
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG306]]
|
||||
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG306]]
|
||||
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG306]]
|
||||
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG306]]
|
||||
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG306]]
|
||||
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG306]]
|
||||
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG306]]
|
||||
// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG306]]
|
||||
// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG306]]
|
||||
// CHECK1-NEXT: [[TMP13:%.*]] = addrspacecast ptr [[TMP9]] to ptr addrspace(1), !dbg [[DBG306]]
|
||||
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined_debug__(ptr [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]], ptr addrspace(1) [[TMP13]]) #[[ATTR3]], !dbg [[DBG306]]
|
||||
// CHECK1-NEXT: ret void, !dbg [[DBG306]]
|
||||
//
|
||||
//
|
||||
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41
|
||||
// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG307:![0-9]+]] {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META310:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311:![0-9]+]]
|
||||
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META312:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]]
|
||||
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META313:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]]
|
||||
// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
|
||||
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META314:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]]
|
||||
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG315:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG315]]
|
||||
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG315]]
|
||||
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG315]]
|
||||
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG315]]
|
||||
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG315]]
|
||||
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG315]]
|
||||
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG315]]
|
||||
// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG315]]
|
||||
// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG315]]
|
||||
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG315]]
|
||||
// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG315]]
|
||||
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__(ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG315]]
|
||||
// CHECK1-NEXT: ret void, !dbg [[DBG315]]
|
||||
//
|
||||
387
clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp
Normal file
387
clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp
Normal file
@ -0,0 +1,387 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=IR-GPU
|
||||
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
|
||||
|
||||
// Check same results after serialization round-trip
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH
|
||||
|
||||
// expected-no-diagnostics
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
typedef void **omp_allocator_handle_t;
|
||||
extern const omp_allocator_handle_t omp_null_allocator;
|
||||
extern const omp_allocator_handle_t omp_default_mem_alloc;
|
||||
extern const omp_allocator_handle_t omp_large_cap_mem_alloc;
|
||||
extern const omp_allocator_handle_t omp_const_mem_alloc;
|
||||
extern const omp_allocator_handle_t omp_high_bw_mem_alloc;
|
||||
extern const omp_allocator_handle_t omp_low_lat_mem_alloc;
|
||||
extern const omp_allocator_handle_t omp_cgroup_mem_alloc;
|
||||
extern const omp_allocator_handle_t omp_pteam_mem_alloc;
|
||||
extern const omp_allocator_handle_t omp_thread_mem_alloc;
|
||||
|
||||
extern int omp_get_thread_num(void);
|
||||
|
||||
#define N 64
|
||||
|
||||
int main() {
|
||||
int x = 0;
|
||||
int device_result[N] = {0};
|
||||
|
||||
#pragma omp target parallel loop num_threads(N) uses_allocators(omp_pteam_mem_alloc) allocate(omp_pteam_mem_alloc: x) private(x) map(from: device_result)
|
||||
for (int i = 0; i < N; i++) {
|
||||
x = omp_get_thread_num();
|
||||
device_result[i] = i + x;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37
|
||||
// IR-GPU-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
// IR-GPU-NEXT: entry:
|
||||
// IR-GPU-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// IR-GPU-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
|
||||
// IR-GPU-NEXT: [[DEVICE_RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEVICE_RESULT_ADDR]] to ptr
|
||||
// IR-GPU-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OMP_PTEAM_MEM_ALLOC_ADDR]] to ptr
|
||||
// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
|
||||
// IR-GPU-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8
|
||||
// IR-GPU-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST]], align 8
|
||||
// IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8
|
||||
// IR-GPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false)
|
||||
// IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
|
||||
// IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
||||
// IR-GPU: user_code.entry:
|
||||
// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
|
||||
// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST]], align 8
|
||||
// IR-GPU-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
|
||||
// IR-GPU-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8
|
||||
// IR-GPU-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
|
||||
// IR-GPU-NEXT: store ptr [[TMP3]], ptr [[TMP5]], align 8
|
||||
// IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 64, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
|
||||
// IR-GPU-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2)
|
||||
// IR-GPU-NEXT: ret void
|
||||
// IR-GPU: worker.exit:
|
||||
// IR-GPU-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_omp_outlined
|
||||
// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
// IR-GPU-NEXT: entry:
|
||||
// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// IR-GPU-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// IR-GPU-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// IR-GPU-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// IR-GPU-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
|
||||
// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
|
||||
// IR-GPU-NEXT: [[DEVICE_RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEVICE_RESULT_ADDR]] to ptr
|
||||
// IR-GPU-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OMP_PTEAM_MEM_ALLOC_ADDR]] to ptr
|
||||
// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
|
||||
// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
|
||||
// IR-GPU-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
|
||||
// IR-GPU-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
|
||||
// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
|
||||
// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
|
||||
// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
|
||||
// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
|
||||
// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
|
||||
// IR-GPU-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8
|
||||
// IR-GPU-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST]], align 8
|
||||
// IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8
|
||||
// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: store i32 63, ptr [[DOTOMP_UB_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
|
||||
// IR-GPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
|
||||
// IR-GPU-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP2]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
|
||||
// IR-GPU-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
|
||||
// IR-GPU: omp.dispatch.cond:
|
||||
// IR-GPU-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 63
|
||||
// IR-GPU-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// IR-GPU: cond.true:
|
||||
// IR-GPU-NEXT: br label [[COND_END:%.*]]
|
||||
// IR-GPU: cond.false:
|
||||
// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: br label [[COND_END]]
|
||||
// IR-GPU: cond.end:
|
||||
// IR-GPU-NEXT: [[COND:%.*]] = phi i32 [ 63, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
|
||||
// IR-GPU-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
|
||||
// IR-GPU-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
|
||||
// IR-GPU: omp.dispatch.body:
|
||||
// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// IR-GPU: omp.inner.for.cond:
|
||||
// IR-GPU-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
|
||||
// IR-GPU-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// IR-GPU: omp.inner.for.body:
|
||||
// IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
|
||||
// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// IR-GPU-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: [[CALL:%.*]] = call noundef i32 @_Z18omp_get_thread_numv() #[[ATTR5:[0-9]+]]
|
||||
// IR-GPU-NEXT: store i32 [[CALL]], ptr addrspacecast (ptr addrspace(3) @x to ptr), align 4
|
||||
// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @x to ptr), align 4
|
||||
// IR-GPU-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
|
||||
// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64
|
||||
// IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
|
||||
// IR-GPU-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
|
||||
// IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// IR-GPU: omp.body.continue:
|
||||
// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// IR-GPU: omp.inner.for.inc:
|
||||
// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1
|
||||
// IR-GPU-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// IR-GPU: omp.inner.for.end:
|
||||
// IR-GPU-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
|
||||
// IR-GPU: omp.dispatch.inc:
|
||||
// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
|
||||
// IR-GPU-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
|
||||
// IR-GPU-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB_ASCAST]], align 4
|
||||
// IR-GPU-NEXT: br label [[OMP_DISPATCH_COND]]
|
||||
// IR-GPU: omp.dispatch.end:
|
||||
// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP2]])
|
||||
// IR-GPU-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// IR-LABEL: define {{[^@]+}}@main
|
||||
// IR-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
// IR-NEXT: entry:
|
||||
// IR-NEXT: [[X:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[DEVICE_RESULT:%.*]] = alloca [64 x i32], align 16
|
||||
// IR-NEXT: store i32 0, ptr [[X]], align 4
|
||||
// IR-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[DEVICE_RESULT]], i8 0, i64 256, i1 false)
|
||||
// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8
|
||||
// IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37(ptr [[DEVICE_RESULT]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]]
|
||||
// IR-NEXT: ret i32 0
|
||||
//
|
||||
//
|
||||
// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37
|
||||
// IR-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR2:[0-9]+]] {
|
||||
// IR-NEXT: entry:
|
||||
// IR-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
|
||||
// IR-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8
|
||||
// IR-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8
|
||||
// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8
|
||||
// IR-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP0]], i32 64)
|
||||
// IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8
|
||||
// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined, ptr [[TMP1]], ptr [[TMP2]])
|
||||
// IR-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined
|
||||
// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR2]] {
|
||||
// IR-NEXT: entry:
|
||||
// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// IR-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8
|
||||
// IR-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8
|
||||
// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8
|
||||
// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// IR-NEXT: store i32 63, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
|
||||
// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8
|
||||
// IR-NEXT: [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr [[TMP3]])
|
||||
// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 63
|
||||
// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// IR: cond.true:
|
||||
// IR-NEXT: br label [[COND_END:%.*]]
|
||||
// IR: cond.false:
|
||||
// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-NEXT: br label [[COND_END]]
|
||||
// IR: cond.end:
|
||||
// IR-NEXT: [[COND:%.*]] = phi i32 [ 63, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
|
||||
// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
||||
// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// IR-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
|
||||
// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// IR: omp.inner.for.cond:
|
||||
// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
|
||||
// IR-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
|
||||
// IR: omp.inner.for.cond.cleanup:
|
||||
// IR-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
|
||||
// IR: omp.inner.for.body:
|
||||
// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
|
||||
// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// IR-NEXT: store i32 [[ADD]], ptr [[I]], align 4
|
||||
// IR-NEXT: [[CALL:%.*]] = call noundef i32 @_Z18omp_get_thread_numv()
|
||||
// IR-NEXT: store i32 [[CALL]], ptr [[DOTX__VOID_ADDR]], align 4
|
||||
// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4
|
||||
// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
|
||||
// IR-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
|
||||
// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4
|
||||
// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
|
||||
// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
|
||||
// IR-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
|
||||
// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// IR: omp.body.continue:
|
||||
// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// IR: omp.inner.for.inc:
|
||||
// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1
|
||||
// IR-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
|
||||
// IR-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// IR: omp.inner.for.end:
|
||||
// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// IR: omp.loop.exit:
|
||||
// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
|
||||
// IR-NEXT: [[TMP14:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8
|
||||
// IR-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], ptr [[TMP14]])
|
||||
// IR-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// IR-PCH-LABEL: define {{[^@]+}}@main
|
||||
// IR-PCH-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
// IR-PCH-NEXT: entry:
|
||||
// IR-PCH-NEXT: [[X:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[DEVICE_RESULT:%.*]] = alloca [64 x i32], align 16
|
||||
// IR-PCH-NEXT: store i32 0, ptr [[X]], align 4
|
||||
// IR-PCH-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[DEVICE_RESULT]], i8 0, i64 256, i1 false)
|
||||
// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8
|
||||
// IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37(ptr [[DEVICE_RESULT]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]]
|
||||
// IR-PCH-NEXT: ret i32 0
|
||||
//
|
||||
//
|
||||
// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37
|
||||
// IR-PCH-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR2:[0-9]+]] {
|
||||
// IR-PCH-NEXT: entry:
|
||||
// IR-PCH-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
|
||||
// IR-PCH-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8
|
||||
// IR-PCH-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8
|
||||
// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8
|
||||
// IR-PCH-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP0]], i32 64)
|
||||
// IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8
|
||||
// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined, ptr [[TMP1]], ptr [[TMP2]])
|
||||
// IR-PCH-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined
|
||||
// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR2]] {
|
||||
// IR-PCH-NEXT: entry:
|
||||
// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// IR-PCH-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8
|
||||
// IR-PCH-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8
|
||||
// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8
|
||||
// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// IR-PCH-NEXT: store i32 63, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-PCH-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
|
||||
// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8
|
||||
// IR-PCH-NEXT: [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr [[TMP3]])
|
||||
// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 63
|
||||
// IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// IR-PCH: cond.true:
|
||||
// IR-PCH-NEXT: br label [[COND_END:%.*]]
|
||||
// IR-PCH: cond.false:
|
||||
// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-PCH-NEXT: br label [[COND_END]]
|
||||
// IR-PCH: cond.end:
|
||||
// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 63, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
|
||||
// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
||||
// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// IR-PCH-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
|
||||
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// IR-PCH: omp.inner.for.cond:
|
||||
// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-PCH-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
|
||||
// IR-PCH-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
|
||||
// IR-PCH: omp.inner.for.cond.cleanup:
|
||||
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
|
||||
// IR-PCH: omp.inner.for.body:
|
||||
// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
|
||||
// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I]], align 4
|
||||
// IR-PCH-NEXT: [[CALL:%.*]] = call noundef i32 @_Z18omp_get_thread_numv()
|
||||
// IR-PCH-NEXT: store i32 [[CALL]], ptr [[DOTX__VOID_ADDR]], align 4
|
||||
// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4
|
||||
// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
|
||||
// IR-PCH-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
|
||||
// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4
|
||||
// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
|
||||
// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
|
||||
// IR-PCH-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
|
||||
// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// IR-PCH: omp.body.continue:
|
||||
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// IR-PCH: omp.inner.for.inc:
|
||||
// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-PCH-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1
|
||||
// IR-PCH-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
|
||||
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// IR-PCH: omp.inner.for.end:
|
||||
// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// IR-PCH: omp.loop.exit:
|
||||
// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
|
||||
// IR-PCH-NEXT: [[TMP14:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8
|
||||
// IR-PCH-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], ptr [[TMP14]])
|
||||
// IR-PCH-NEXT: ret void
|
||||
//
|
||||
@ -0,0 +1,210 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
|
||||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK
|
||||
|
||||
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
|
||||
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
|
||||
|
||||
// Test target codegen - host bc file has to be created first.
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK
|
||||
|
||||
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
|
||||
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
|
||||
|
||||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Check target registration is registered as a Ctor.
|
||||
|
||||
|
||||
template<typename tx, typename ty>
|
||||
struct TT{
|
||||
tx X;
|
||||
ty Y;
|
||||
};
|
||||
|
||||
int global;
|
||||
extern int global;
|
||||
|
||||
int foo(int n) {
|
||||
int a = 0;
|
||||
short aa = 0;
|
||||
float b[10];
|
||||
float bn[n];
|
||||
double c[5][10];
|
||||
double cn[5][n];
|
||||
TT<long long, char> d;
|
||||
static long *plocal;
|
||||
|
||||
#pragma omp target parallel loop device(global + a) depend(in: global) depend(out: a, b, cn[4])
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#pragma omp target parallel loop device(global + a) nowait depend(inout: global, a, bn) if(target:a)
|
||||
for (int i = 0; i < *plocal; ++i) {
|
||||
static int local1;
|
||||
*plocal = global;
|
||||
local1 = global;
|
||||
}
|
||||
|
||||
#pragma omp target parallel loop if(0) firstprivate(global) depend(out:global)
|
||||
for (int i = 0; i < global; ++i) {
|
||||
global += 1;
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
// Check that the offloading functions are emitted and that the arguments are
|
||||
// correct and loaded correctly for the target regions in foo().
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// CHECK-64: [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
|
||||
// CHECK-64: store i32 [[BP1_I32]], i32* [[BP1_CAST]],
|
||||
// CHECK-32: store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
|
||||
|
||||
// CHECK-64: [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
|
||||
// CHECK-64: store i32 [[BP1_I32]], i32* [[BP1_CAST]],
|
||||
// CHECK-32: store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
|
||||
|
||||
// Create stack storage and store argument in there.
|
||||
// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
|
||||
// CHECK-64: load i32, i32* [[AA_CADDR]], align
|
||||
// CHECK-32: load i32, i32* [[AA_ADDR]], align
|
||||
|
||||
// CHECK-64: [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32*
|
||||
// CHECK-64: store i32 [[BP1_I32]], i32* [[BP1_CAST]],
|
||||
// CHECK-32: store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]],
|
||||
|
||||
|
||||
#endif
|
||||
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66
|
||||
// CHECK-SAME: () #[[ATTR2:[0-9]+]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66.omp_outlined)
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
|
||||
// CHECK-SAME: () #[[ATTR8:[0-9]+]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: call void @__tgt_register_requires(i64 1)
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66
|
||||
// TCHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
// TCHECK-NEXT: entry:
|
||||
// TCHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66.omp_outlined)
|
||||
// TCHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
// SIMD-ONLY0: {{.*}}
|
||||
// SIMD-ONLY1: {{.*}}
|
||||
@ -0,0 +1,211 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
|
||||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK
|
||||
|
||||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
enum omp_allocator_handle_t {
|
||||
omp_null_allocator = 0,
|
||||
omp_default_mem_alloc = 1,
|
||||
omp_large_cap_mem_alloc = 2,
|
||||
omp_const_mem_alloc = 3,
|
||||
omp_high_bw_mem_alloc = 4,
|
||||
omp_low_lat_mem_alloc = 5,
|
||||
omp_cgroup_mem_alloc = 6,
|
||||
omp_pteam_mem_alloc = 7,
|
||||
omp_thread_mem_alloc = 8,
|
||||
KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
|
||||
};
|
||||
|
||||
typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1,
|
||||
omp_atk_alignment = 2,
|
||||
omp_atk_access = 3,
|
||||
omp_atk_pool_size = 4,
|
||||
omp_atk_fallback = 5,
|
||||
omp_atk_fb_data = 6,
|
||||
omp_atk_pinned = 7,
|
||||
omp_atk_partition = 8
|
||||
} omp_alloctrait_key_t;
|
||||
typedef enum omp_alloctrait_value_t {
|
||||
omp_atv_false = 0,
|
||||
omp_atv_true = 1,
|
||||
omp_atv_default = 2,
|
||||
omp_atv_contended = 3,
|
||||
omp_atv_uncontended = 4,
|
||||
omp_atv_sequential = 5,
|
||||
omp_atv_private = 6,
|
||||
omp_atv_all = 7,
|
||||
omp_atv_thread = 8,
|
||||
omp_atv_pteam = 9,
|
||||
omp_atv_cgroup = 10,
|
||||
omp_atv_default_mem_fb = 11,
|
||||
omp_atv_null_fb = 12,
|
||||
omp_atv_abort_fb = 13,
|
||||
omp_atv_allocator_fb = 14,
|
||||
omp_atv_environment = 15,
|
||||
omp_atv_nearest = 16,
|
||||
omp_atv_blocked = 17,
|
||||
omp_atv_interleaved = 18
|
||||
} omp_alloctrait_value_t;
|
||||
|
||||
typedef struct omp_alloctrait_t {
|
||||
omp_alloctrait_key_t key;
|
||||
__UINTPTR_TYPE__ value;
|
||||
} omp_alloctrait_t;
|
||||
|
||||
// Just map the traits variable as a firstprivate variable.
|
||||
|
||||
void foo() {
|
||||
omp_alloctrait_t traits[10];
|
||||
omp_allocator_handle_t my_allocator;
|
||||
|
||||
#pragma omp target parallel loop uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits))
|
||||
for (int i = 0; i < 10; ++i)
|
||||
;
|
||||
}
|
||||
|
||||
|
||||
// Destroy allocator upon exit from the region.
|
||||
|
||||
#endif
|
||||
// CHECK-LABEL: define {{[^@]+}}@_Z3foov
|
||||
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TRAITS:%.*]] = alloca [10 x %struct.omp_alloctrait_t], align 8
|
||||
// CHECK-NEXT: [[MY_ALLOCATOR:%.*]] = alloca i64, align 8
|
||||
// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
|
||||
// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
|
||||
// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
|
||||
// CHECK-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
||||
// CHECK-NEXT: store ptr [[TRAITS]], ptr [[TMP0]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
||||
// CHECK-NEXT: store ptr [[TRAITS]], ptr [[TMP1]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
||||
// CHECK-NEXT: store ptr null, ptr [[TMP2]], align 8
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
||||
// CHECK-NEXT: store i32 2, ptr [[TMP5]], align 4
|
||||
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
||||
// CHECK-NEXT: store i32 1, ptr [[TMP6]], align 4
|
||||
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
||||
// CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
|
||||
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
||||
// CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
|
||||
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
||||
// CHECK-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8
|
||||
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
||||
// CHECK-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8
|
||||
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
||||
// CHECK-NEXT: store ptr null, ptr [[TMP11]], align 8
|
||||
// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
||||
// CHECK-NEXT: store ptr null, ptr [[TMP12]], align 8
|
||||
// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
||||
// CHECK-NEXT: store i64 0, ptr [[TMP13]], align 8
|
||||
// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
||||
// CHECK-NEXT: store i64 0, ptr [[TMP14]], align 8
|
||||
// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
||||
// CHECK-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP15]], align 4
|
||||
// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
||||
// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4
|
||||
// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
||||
// CHECK-NEXT: store i32 0, ptr [[TMP17]], align 4
|
||||
// CHECK-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.region_id, ptr [[KERNEL_ARGS]])
|
||||
// CHECK-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
|
||||
// CHECK-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
||||
// CHECK: omp_offload.failed:
|
||||
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66(ptr [[TRAITS]]) #[[ATTR2:[0-9]+]]
|
||||
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
||||
// CHECK: omp_offload.cont:
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66
|
||||
// CHECK-SAME: (ptr noundef nonnull align 8 dereferenceable(160) [[TRAITS:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TRAITS_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK-NEXT: [[MY_ALLOCATOR:%.*]] = alloca i64, align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
|
||||
// CHECK-NEXT: store ptr [[TRAITS]], ptr [[TRAITS_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TRAITS_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_init_allocator(i32 [[TMP0]], ptr null, i32 10, ptr [[TMP1]])
|
||||
// CHECK-NEXT: [[CONV:%.*]] = ptrtoint ptr [[TMP2]] to i64
|
||||
// CHECK-NEXT: store i64 [[CONV]], ptr [[MY_ALLOCATOR]], align 8
|
||||
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.omp_outlined)
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[MY_ALLOCATOR]], align 8
|
||||
// CHECK-NEXT: [[CONV1:%.*]] = inttoptr i64 [[TMP3]] to ptr
|
||||
// CHECK-NEXT: call void @__kmpc_destroy_allocator(i32 [[TMP0]], ptr [[CONV1]])
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.omp_outlined
|
||||
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
|
||||
// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// CHECK: cond.true:
|
||||
// CHECK-NEXT: br label [[COND_END:%.*]]
|
||||
// CHECK: cond.false:
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: br label [[COND_END]]
|
||||
// CHECK: cond.end:
|
||||
// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
|
||||
// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// CHECK: omp.inner.for.cond:
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
|
||||
// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// CHECK: omp.inner.for.body:
|
||||
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
|
||||
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4
|
||||
// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// CHECK: omp.body.continue:
|
||||
// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// CHECK: omp.inner.for.inc:
|
||||
// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
|
||||
// CHECK-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// CHECK: omp.inner.for.end:
|
||||
// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// CHECK: omp.loop.exit:
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
|
||||
// CHECK-SAME: () #[[ATTR4:[0-9]+]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: call void @__tgt_register_requires(i64 1)
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
2664
clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp
Normal file
2664
clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp
Normal file
File diff suppressed because it is too large
Load Diff
2688
clang/test/OpenMP/target_teams_generic_loop_codegen.cpp
Normal file
2688
clang/test/OpenMP/target_teams_generic_loop_codegen.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1915
clang/test/OpenMP/target_teams_generic_loop_collapse_codegen.cpp
Normal file
1915
clang/test/OpenMP/target_teams_generic_loop_collapse_codegen.cpp
Normal file
File diff suppressed because it is too large
Load Diff
3219
clang/test/OpenMP/target_teams_generic_loop_depend_codegen.cpp
Normal file
3219
clang/test/OpenMP/target_teams_generic_loop_depend_codegen.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1576
clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp
Normal file
1576
clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp
Normal file
File diff suppressed because it is too large
Load Diff
207
clang/test/OpenMP/target_teams_generic_loop_order_codegen.cpp
Normal file
207
clang/test/OpenMP/target_teams_generic_loop_order_codegen.cpp
Normal file
@ -0,0 +1,207 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1
|
||||
|
||||
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
|
||||
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
|
||||
// REQUIRES: powerpc-registered-target
|
||||
|
||||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
void gtid_test() {
|
||||
#pragma omp target teams loop order(concurrent)
|
||||
for(int i = 0 ; i < 100; i++) {}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
// CHECK1-LABEL: define {{[^@]+}}@_Z9gtid_testv
|
||||
// CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
||||
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
||||
// CHECK1-NEXT: store i32 2, ptr [[TMP0]], align 4
|
||||
// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
||||
// CHECK1-NEXT: store i32 0, ptr [[TMP1]], align 4
|
||||
// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
||||
// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
|
||||
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
||||
// CHECK1-NEXT: store ptr null, ptr [[TMP3]], align 8
|
||||
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
||||
// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
|
||||
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
||||
// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8
|
||||
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
||||
// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8
|
||||
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
||||
// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8
|
||||
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
||||
// CHECK1-NEXT: store i64 100, ptr [[TMP8]], align 8
|
||||
// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
||||
// CHECK1-NEXT: store i64 0, ptr [[TMP9]], align 8
|
||||
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
||||
// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP10]], align 4
|
||||
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
||||
// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4
|
||||
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
||||
// CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4
|
||||
// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16.region_id, ptr [[KERNEL_ARGS]])
|
||||
// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
|
||||
// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
||||
// CHECK1: omp_offload.failed:
|
||||
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16() #[[ATTR2:[0-9]+]]
|
||||
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
||||
// CHECK1: omp_offload.cont:
|
||||
// CHECK1-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16
|
||||
// CHECK1-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16.omp_outlined)
|
||||
// CHECK1-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16.omp_outlined
|
||||
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
|
||||
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99
|
||||
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// CHECK1: cond.true:
|
||||
// CHECK1-NEXT: br label [[COND_END:%.*]]
|
||||
// CHECK1: cond.false:
|
||||
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK1-NEXT: br label [[COND_END]]
|
||||
// CHECK1: cond.end:
|
||||
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
|
||||
// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// CHECK1: omp.inner.for.cond:
|
||||
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
|
||||
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// CHECK1: omp.inner.for.body:
|
||||
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
|
||||
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
|
||||
// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// CHECK1: omp.inner.for.inc:
|
||||
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
|
||||
// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// CHECK1: omp.inner.for.end:
|
||||
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// CHECK1: omp.loop.exit:
|
||||
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]])
|
||||
// CHECK1-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16.omp_outlined.omp_outlined
|
||||
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
|
||||
// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
|
||||
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
|
||||
// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
|
||||
// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
|
||||
// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
|
||||
// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
|
||||
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99
|
||||
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// CHECK1: cond.true:
|
||||
// CHECK1-NEXT: br label [[COND_END:%.*]]
|
||||
// CHECK1: cond.false:
|
||||
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK1-NEXT: br label [[COND_END]]
|
||||
// CHECK1: cond.end:
|
||||
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
|
||||
// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// CHECK1: omp.inner.for.cond:
|
||||
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]]
|
||||
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]]
|
||||
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
|
||||
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// CHECK1: omp.inner.for.body:
|
||||
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
|
||||
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
|
||||
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]]
|
||||
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// CHECK1: omp.body.continue:
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// CHECK1: omp.inner.for.inc:
|
||||
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
|
||||
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
|
||||
// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
|
||||
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
|
||||
// CHECK1: omp.inner.for.end:
|
||||
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// CHECK1: omp.loop.exit:
|
||||
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
|
||||
// CHECK1-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
|
||||
// CHECK1-SAME: () #[[ATTR3:[0-9]+]] section ".text.startup" {
|
||||
// CHECK1-NEXT: entry:
|
||||
// CHECK1-NEXT: call void @__tgt_register_requires(i64 1)
|
||||
// CHECK1-NEXT: ret void
|
||||
//
|
||||
3124
clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp
Normal file
3124
clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1510
clang/test/OpenMP/target_teams_generic_loop_reduction_codegen.cpp
Normal file
1510
clang/test/OpenMP/target_teams_generic_loop_reduction_codegen.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,482 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
|
||||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK
|
||||
|
||||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
enum omp_allocator_handle_t {
|
||||
omp_null_allocator = 0,
|
||||
omp_default_mem_alloc = 1,
|
||||
omp_large_cap_mem_alloc = 2,
|
||||
omp_const_mem_alloc = 3,
|
||||
omp_high_bw_mem_alloc = 4,
|
||||
omp_low_lat_mem_alloc = 5,
|
||||
omp_cgroup_mem_alloc = 6,
|
||||
omp_pteam_mem_alloc = 7,
|
||||
omp_thread_mem_alloc = 8,
|
||||
KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
|
||||
};
|
||||
|
||||
typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1,
|
||||
omp_atk_alignment = 2,
|
||||
omp_atk_access = 3,
|
||||
omp_atk_pool_size = 4,
|
||||
omp_atk_fallback = 5,
|
||||
omp_atk_fb_data = 6,
|
||||
omp_atk_pinned = 7,
|
||||
omp_atk_partition = 8
|
||||
} omp_alloctrait_key_t;
|
||||
typedef enum omp_alloctrait_value_t {
|
||||
omp_atv_false = 0,
|
||||
omp_atv_true = 1,
|
||||
omp_atv_default = 2,
|
||||
omp_atv_contended = 3,
|
||||
omp_atv_uncontended = 4,
|
||||
omp_atv_sequential = 5,
|
||||
omp_atv_private = 6,
|
||||
omp_atv_all = 7,
|
||||
omp_atv_thread = 8,
|
||||
omp_atv_pteam = 9,
|
||||
omp_atv_cgroup = 10,
|
||||
omp_atv_default_mem_fb = 11,
|
||||
omp_atv_null_fb = 12,
|
||||
omp_atv_abort_fb = 13,
|
||||
omp_atv_allocator_fb = 14,
|
||||
omp_atv_environment = 15,
|
||||
omp_atv_nearest = 16,
|
||||
omp_atv_blocked = 17,
|
||||
omp_atv_interleaved = 18
|
||||
} omp_alloctrait_value_t;
|
||||
|
||||
typedef struct omp_alloctrait_t {
|
||||
omp_alloctrait_key_t key;
|
||||
__UINTPTR_TYPE__ value;
|
||||
} omp_alloctrait_t;
|
||||
|
||||
// Just map the traits variable as a firstprivate variable.
|
||||
|
||||
void foo() {
|
||||
omp_alloctrait_t traits[10];
|
||||
omp_allocator_handle_t my_allocator;
|
||||
|
||||
#pragma omp target teams loop uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits))
|
||||
for (int i = 0; i < 10; ++i)
|
||||
;
|
||||
}
|
||||
|
||||
|
||||
// Destroy allocator upon exit from the region.
|
||||
|
||||
#endif
|
||||
// CHECK-64-LABEL: define {{[^@]+}}@_Z3foov
|
||||
// CHECK-64-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
// CHECK-64-NEXT: entry:
|
||||
// CHECK-64-NEXT: [[TRAITS:%.*]] = alloca [10 x %struct.omp_alloctrait_t], align 8
|
||||
// CHECK-64-NEXT: [[MY_ALLOCATOR:%.*]] = alloca i64, align 8
|
||||
// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
|
||||
// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
|
||||
// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
|
||||
// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// CHECK-64-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
||||
// CHECK-64-NEXT: store ptr [[TRAITS]], ptr [[TMP0]], align 8
|
||||
// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
||||
// CHECK-64-NEXT: store ptr [[TRAITS]], ptr [[TMP1]], align 8
|
||||
// CHECK-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
||||
// CHECK-64-NEXT: store ptr null, ptr [[TMP2]], align 8
|
||||
// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
||||
// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
||||
// CHECK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
||||
// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
||||
// CHECK-64-NEXT: store i32 2, ptr [[TMP5]], align 4
|
||||
// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
||||
// CHECK-64-NEXT: store i32 1, ptr [[TMP6]], align 4
|
||||
// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
||||
// CHECK-64-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
|
||||
// CHECK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
||||
// CHECK-64-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
|
||||
// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
||||
// CHECK-64-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8
|
||||
// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
||||
// CHECK-64-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8
|
||||
// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
||||
// CHECK-64-NEXT: store ptr null, ptr [[TMP11]], align 8
|
||||
// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
||||
// CHECK-64-NEXT: store ptr null, ptr [[TMP12]], align 8
|
||||
// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
||||
// CHECK-64-NEXT: store i64 10, ptr [[TMP13]], align 8
|
||||
// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
||||
// CHECK-64-NEXT: store i64 0, ptr [[TMP14]], align 8
|
||||
// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
||||
// CHECK-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4
|
||||
// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
||||
// CHECK-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4
|
||||
// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
||||
// CHECK-64-NEXT: store i32 0, ptr [[TMP17]], align 4
|
||||
// CHECK-64-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73.region_id, ptr [[KERNEL_ARGS]])
|
||||
// CHECK-64-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
|
||||
// CHECK-64-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
||||
// CHECK-64: omp_offload.failed:
|
||||
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73(ptr [[TRAITS]]) #[[ATTR2:[0-9]+]]
|
||||
// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
||||
// CHECK-64: omp_offload.cont:
|
||||
// CHECK-64-NEXT: ret void
|
||||
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73
|
||||
// CHECK-64-SAME: (ptr noundef nonnull align 8 dereferenceable(160) [[TRAITS:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
// CHECK-64-NEXT: entry:
|
||||
// CHECK-64-NEXT: [[TRAITS_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK-64-NEXT: [[MY_ALLOCATOR:%.*]] = alloca i64, align 8
|
||||
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
|
||||
// CHECK-64-NEXT: store ptr [[TRAITS]], ptr [[TRAITS_ADDR]], align 8
|
||||
// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TRAITS_ADDR]], align 8
|
||||
// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
|
||||
// CHECK-64-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_init_allocator(i32 [[TMP0]], ptr null, i32 10, ptr [[TMP2]])
|
||||
// CHECK-64-NEXT: [[CONV:%.*]] = ptrtoint ptr [[TMP3]] to i64
|
||||
// CHECK-64-NEXT: store i64 [[CONV]], ptr [[MY_ALLOCATOR]], align 8
|
||||
// CHECK-64-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.)
|
||||
// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[MY_ALLOCATOR]], align 8
|
||||
// CHECK-64-NEXT: [[CONV1:%.*]] = inttoptr i64 [[TMP4]] to ptr
|
||||
// CHECK-64-NEXT: call void @__kmpc_destroy_allocator(i32 [[TMP0]], ptr [[CONV1]])
|
||||
// CHECK-64-NEXT: ret void
|
||||
// CHECK-64-LABEL: define {{[^@]+}}@.omp_outlined.
|
||||
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
|
||||
// CHECK-64-NEXT: entry:
|
||||
// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
|
||||
// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
|
||||
// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
|
||||
// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
|
||||
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// CHECK-64: cond.true:
|
||||
// CHECK-64-NEXT: br label [[COND_END:%.*]]
|
||||
// CHECK-64: cond.false:
|
||||
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK-64-NEXT: br label [[COND_END]]
|
||||
// CHECK-64: cond.end:
|
||||
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
|
||||
// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// CHECK-64: omp.inner.for.cond:
|
||||
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
|
||||
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// CHECK-64: omp.inner.for.body:
|
||||
// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
|
||||
// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
|
||||
// CHECK-64-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]])
|
||||
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// CHECK-64: omp.inner.for.inc:
|
||||
// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
|
||||
// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// CHECK-64: omp.inner.for.end:
|
||||
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// CHECK-64: omp.loop.exit:
|
||||
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP1]])
|
||||
// CHECK-64-NEXT: ret void
|
||||
// CHECK-64-LABEL: define {{[^@]+}}@.omp_outlined..1
|
||||
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
|
||||
// CHECK-64-NEXT: entry:
|
||||
// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
|
||||
// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
|
||||
// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
|
||||
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
|
||||
// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
|
||||
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
|
||||
// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
|
||||
// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
|
||||
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// CHECK-64: cond.true:
|
||||
// CHECK-64-NEXT: br label [[COND_END:%.*]]
|
||||
// CHECK-64: cond.false:
|
||||
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-64-NEXT: br label [[COND_END]]
|
||||
// CHECK-64: cond.end:
|
||||
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
|
||||
// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK-64-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// CHECK-64: omp.inner.for.cond:
|
||||
// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
|
||||
// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// CHECK-64: omp.inner.for.body:
|
||||
// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
|
||||
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4
|
||||
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// CHECK-64: omp.body.continue:
|
||||
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// CHECK-64: omp.inner.for.inc:
|
||||
// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
|
||||
// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// CHECK-64: omp.inner.for.end:
|
||||
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// CHECK-64: omp.loop.exit:
|
||||
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
|
||||
// CHECK-64-NEXT: ret void
|
||||
// CHECK-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
|
||||
// CHECK-64-SAME: () #[[ATTR3:[0-9]+]] {
|
||||
// CHECK-64-NEXT: entry:
|
||||
// CHECK-64-NEXT: call void @__tgt_register_requires(i64 1)
|
||||
// CHECK-64-NEXT: ret void
|
||||
// CHECK-LABEL: define {{[^@]+}}@_Z3foov
|
||||
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TRAITS:%.*]] = alloca [10 x %struct.omp_alloctrait_t], align 8
|
||||
// CHECK-NEXT: [[MY_ALLOCATOR:%.*]] = alloca i64, align 8
|
||||
// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
|
||||
// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
|
||||
// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
|
||||
// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
||||
// CHECK-NEXT: store ptr [[TRAITS]], ptr [[TMP0]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
||||
// CHECK-NEXT: store ptr [[TRAITS]], ptr [[TMP1]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
|
||||
// CHECK-NEXT: store ptr null, ptr [[TMP2]], align 8
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
|
||||
// CHECK-NEXT: store i32 2, ptr [[TMP5]], align 4
|
||||
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
|
||||
// CHECK-NEXT: store i32 1, ptr [[TMP6]], align 4
|
||||
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
|
||||
// CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8
|
||||
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
|
||||
// CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8
|
||||
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
|
||||
// CHECK-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8
|
||||
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
|
||||
// CHECK-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8
|
||||
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
|
||||
// CHECK-NEXT: store ptr null, ptr [[TMP11]], align 8
|
||||
// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
|
||||
// CHECK-NEXT: store ptr null, ptr [[TMP12]], align 8
|
||||
// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
|
||||
// CHECK-NEXT: store i64 10, ptr [[TMP13]], align 8
|
||||
// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
|
||||
// CHECK-NEXT: store i64 0, ptr [[TMP14]], align 8
|
||||
// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
|
||||
// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4
|
||||
// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
|
||||
// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4
|
||||
// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
|
||||
// CHECK-NEXT: store i32 0, ptr [[TMP17]], align 4
|
||||
// CHECK-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.region_id, ptr [[KERNEL_ARGS]])
|
||||
// CHECK-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
|
||||
// CHECK-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
||||
// CHECK: omp_offload.failed:
|
||||
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66(ptr [[TRAITS]]) #[[ATTR2:[0-9]+]]
|
||||
// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]]
|
||||
// CHECK: omp_offload.cont:
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66
|
||||
// CHECK-SAME: (ptr noundef nonnull align 8 dereferenceable(160) [[TRAITS:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TRAITS_ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK-NEXT: [[MY_ALLOCATOR:%.*]] = alloca i64, align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
|
||||
// CHECK-NEXT: store ptr [[TRAITS]], ptr [[TRAITS_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TRAITS_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_init_allocator(i32 [[TMP0]], ptr null, i32 10, ptr [[TMP1]])
|
||||
// CHECK-NEXT: [[CONV:%.*]] = ptrtoint ptr [[TMP2]] to i64
|
||||
// CHECK-NEXT: store i64 [[CONV]], ptr [[MY_ALLOCATOR]], align 8
|
||||
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.omp_outlined)
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[MY_ALLOCATOR]], align 8
|
||||
// CHECK-NEXT: [[CONV1:%.*]] = inttoptr i64 [[TMP3]] to ptr
|
||||
// CHECK-NEXT: call void @__kmpc_destroy_allocator(i32 [[TMP0]], ptr [[CONV1]])
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.omp_outlined
|
||||
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
|
||||
// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// CHECK: cond.true:
|
||||
// CHECK-NEXT: br label [[COND_END:%.*]]
|
||||
// CHECK: cond.false:
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK-NEXT: br label [[COND_END]]
|
||||
// CHECK: cond.end:
|
||||
// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
|
||||
// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// CHECK: omp.inner.for.cond:
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
|
||||
// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// CHECK: omp.inner.for.body:
|
||||
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
|
||||
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
|
||||
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
|
||||
// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// CHECK: omp.inner.for.inc:
|
||||
// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
|
||||
// CHECK-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// CHECK: omp.inner.for.end:
|
||||
// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// CHECK: omp.loop.exit:
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP1]])
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.omp_outlined.omp_outlined
|
||||
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// CHECK-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
|
||||
// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// CHECK-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
|
||||
// CHECK-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
|
||||
// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
|
||||
// CHECK-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
|
||||
// CHECK-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
|
||||
// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// CHECK: cond.true:
|
||||
// CHECK-NEXT: br label [[COND_END:%.*]]
|
||||
// CHECK: cond.false:
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: br label [[COND_END]]
|
||||
// CHECK: cond.end:
|
||||
// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
|
||||
// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// CHECK: omp.inner.for.cond:
|
||||
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
|
||||
// CHECK-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// CHECK: omp.inner.for.body:
|
||||
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
|
||||
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4
|
||||
// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// CHECK: omp.body.continue:
|
||||
// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// CHECK: omp.inner.for.inc:
|
||||
// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
|
||||
// CHECK-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
|
||||
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// CHECK: omp.inner.for.end:
|
||||
// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// CHECK: omp.loop.exit:
|
||||
// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
|
||||
// CHECK-SAME: () #[[ATTR3:[0-9]+]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: call void @__tgt_register_requires(i64 1)
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
3545
clang/test/OpenMP/teams_generic_loop_codegen-1.cpp
Normal file
3545
clang/test/OpenMP/teams_generic_loop_codegen-1.cpp
Normal file
File diff suppressed because it is too large
Load Diff
770
clang/test/OpenMP/teams_generic_loop_codegen.cpp
Normal file
770
clang/test/OpenMP/teams_generic_loop_codegen.cpp
Normal file
@ -0,0 +1,770 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
|
||||
|
||||
// Check same results after serialization round-trip
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH
|
||||
|
||||
// expected-no-diagnostics
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
int foo() {
|
||||
int i;
|
||||
int j;
|
||||
int sum[10][10];
|
||||
|
||||
#pragma omp teams loop reduction(+:sum) collapse(2) bind(parallel) \
|
||||
order(concurrent) lastprivate(j)
|
||||
for(i=0; i<10; i++)
|
||||
for(j=0; j<10; j++)
|
||||
sum[i][j] += i;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
// IR-LABEL: define {{[^@]+}}@_Z3foov
|
||||
// IR-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
// IR-NEXT: entry:
|
||||
// IR-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[J:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16
|
||||
// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @_Z3foov.omp_outlined, ptr [[J]], ptr [[SUM]])
|
||||
// IR-NEXT: ret i32 0
|
||||
//
|
||||
//
|
||||
// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined
|
||||
// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
// IR-NEXT: entry:
|
||||
// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 16
|
||||
// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[_TMP2:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[J3:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[J4:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
|
||||
// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// IR-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8
|
||||
// IR-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8
|
||||
// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8
|
||||
// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8
|
||||
// IR-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i32 0, i32 0, i32 0
|
||||
// IR-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100
|
||||
// IR-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]]
|
||||
// IR-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
|
||||
// IR: omp.arrayinit.body:
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
|
||||
// IR-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]]
|
||||
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
|
||||
// IR: omp.arrayinit.done:
|
||||
// IR-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// IR-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99
|
||||
// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// IR: cond.true:
|
||||
// IR-NEXT: br label [[COND_END:%.*]]
|
||||
// IR: cond.false:
|
||||
// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// IR-NEXT: br label [[COND_END]]
|
||||
// IR: cond.end:
|
||||
// IR-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
|
||||
// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
|
||||
// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// IR: omp.inner.for.cond:
|
||||
// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// IR-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
|
||||
// IR-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// IR: omp.inner.for.body:
|
||||
// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// IR-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
|
||||
// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// IR-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64
|
||||
// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @_Z3foov.omp_outlined.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[J3]], ptr [[SUM1]])
|
||||
// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// IR: omp.inner.for.inc:
|
||||
// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
|
||||
// IR-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
|
||||
// IR-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// IR: omp.inner.for.end:
|
||||
// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// IR: omp.loop.exit:
|
||||
// IR-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
|
||||
// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]])
|
||||
// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// IR-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
|
||||
// IR-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
|
||||
// IR: .omp.lastprivate.then:
|
||||
// IR-NEXT: store i32 10, ptr [[J3]], align 4
|
||||
// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[J3]], align 4
|
||||
// IR-NEXT: store i32 [[TMP20]], ptr [[TMP0]], align 4
|
||||
// IR-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
|
||||
// IR: .omp.lastprivate.done:
|
||||
// IR-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
|
||||
// IR-NEXT: store ptr [[SUM1]], ptr [[TMP21]], align 8
|
||||
// IR-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
|
||||
// IR-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
|
||||
// IR-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
||||
// IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
||||
// IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
||||
// IR-NEXT: ]
|
||||
// IR: .omp.reduction.case1:
|
||||
// IR-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP25]]
|
||||
// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
||||
// IR: omp.arraycpy.body:
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
||||
// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4
|
||||
// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
||||
// IR-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]]
|
||||
// IR-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP25]]
|
||||
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]]
|
||||
// IR: omp.arraycpy.done10:
|
||||
// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var)
|
||||
// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
||||
// IR: .omp.reduction.case2:
|
||||
// IR-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP1]], [[TMP28]]
|
||||
// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]]
|
||||
// IR: omp.arraycpy.body12:
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ]
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ]
|
||||
// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4
|
||||
// IR-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP29]] monotonic, align 4
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP28]]
|
||||
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]]
|
||||
// IR: omp.arraycpy.done18:
|
||||
// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
||||
// IR: .omp.reduction.default:
|
||||
// IR-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined
|
||||
// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] {
|
||||
// IR-NEXT: entry:
|
||||
// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
|
||||
// IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
|
||||
// IR-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[J3:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16
|
||||
// IR-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[J5:%.*]] = alloca i32, align 4
|
||||
// IR-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
|
||||
// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
|
||||
// IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
|
||||
// IR-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8
|
||||
// IR-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8
|
||||
// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8
|
||||
// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8
|
||||
// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// IR-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
|
||||
// IR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32
|
||||
// IR-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
|
||||
// IR-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32
|
||||
// IR-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
|
||||
// IR-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
|
||||
// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// IR-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0
|
||||
// IR-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100
|
||||
// IR-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]]
|
||||
// IR-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
|
||||
// IR: omp.arrayinit.body:
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
|
||||
// IR-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]]
|
||||
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
|
||||
// IR: omp.arrayinit.done:
|
||||
// IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
|
||||
// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99
|
||||
// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// IR: cond.true:
|
||||
// IR-NEXT: br label [[COND_END:%.*]]
|
||||
// IR: cond.false:
|
||||
// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-NEXT: br label [[COND_END]]
|
||||
// IR: cond.end:
|
||||
// IR-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ]
|
||||
// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
||||
// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// IR-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
|
||||
// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// IR: omp.inner.for.cond:
|
||||
// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
|
||||
// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]]
|
||||
// IR-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// IR: omp.inner.for.body:
|
||||
// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP12]], 10
|
||||
// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
|
||||
// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// IR-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP14]], 10
|
||||
// IR-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10
|
||||
// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], [[MUL8]]
|
||||
// IR-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1
|
||||
// IR-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]]
|
||||
// IR-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64
|
||||
// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]]
|
||||
// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP17]] to i64
|
||||
// IR-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]]
|
||||
// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP18]], [[TMP15]]
|
||||
// IR-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// IR: omp.body.continue:
|
||||
// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// IR: omp.inner.for.inc:
|
||||
// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], 1
|
||||
// IR-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
|
||||
// IR: omp.inner.for.end:
|
||||
// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// IR: omp.loop.exit:
|
||||
// IR-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
|
||||
// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
|
||||
// IR-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
|
||||
// IR-NEXT: store ptr [[SUM4]], ptr [[TMP22]], align 8
|
||||
// IR-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
|
||||
// IR-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
|
||||
// IR-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
||||
// IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
||||
// IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
||||
// IR-NEXT: ]
|
||||
// IR: .omp.reduction.case1:
|
||||
// IR-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP26]]
|
||||
// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
||||
// IR: omp.arraycpy.body:
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
||||
// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4
|
||||
// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
||||
// IR-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP27]], [[TMP28]]
|
||||
// IR-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP26]]
|
||||
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]]
|
||||
// IR: omp.arraycpy.done19:
|
||||
// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
|
||||
// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
||||
// IR: .omp.reduction.case2:
|
||||
// IR-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP1]], [[TMP29]]
|
||||
// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]]
|
||||
// IR: omp.arraycpy.body21:
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ]
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ]
|
||||
// IR-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4
|
||||
// IR-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP30]] monotonic, align 4
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP29]]
|
||||
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]]
|
||||
// IR: omp.arraycpy.done27:
|
||||
// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
||||
// IR: .omp.reduction.default:
|
||||
// IR-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// IR-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
|
||||
// IR-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
|
||||
// IR: .omp.lastprivate.then:
|
||||
// IR-NEXT: store i32 10, ptr [[J3]], align 4
|
||||
// IR-NEXT: [[TMP34:%.*]] = load i32, ptr [[J3]], align 4
|
||||
// IR-NEXT: store i32 [[TMP34]], ptr [[TMP0]], align 4
|
||||
// IR-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
|
||||
// IR: .omp.lastprivate.done:
|
||||
// IR-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func
|
||||
// IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
|
||||
// IR-NEXT: entry:
|
||||
// IR-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
|
||||
// IR-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
|
||||
// IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
|
||||
// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
|
||||
// IR-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
|
||||
// IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
|
||||
// IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
|
||||
// IR-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
|
||||
// IR-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
|
||||
// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
||||
// IR: omp.arraycpy.body:
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
||||
// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
||||
// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
||||
// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
|
||||
// IR-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
|
||||
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
|
||||
// IR: omp.arraycpy.done2:
|
||||
// IR-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp.reduction.reduction_func
|
||||
// IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] {
|
||||
// IR-NEXT: entry:
|
||||
// IR-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
|
||||
// IR-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
|
||||
// IR-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
|
||||
// IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
|
||||
// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
|
||||
// IR-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
|
||||
// IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
|
||||
// IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
|
||||
// IR-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
|
||||
// IR-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
|
||||
// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
||||
// IR: omp.arraycpy.body:
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
||||
// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
||||
// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
||||
// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
|
||||
// IR-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
||||
// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
|
||||
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
|
||||
// IR: omp.arraycpy.done2:
|
||||
// IR-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov
|
||||
// IR-PCH-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
// IR-PCH-NEXT: entry:
|
||||
// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16
|
||||
// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @_Z3foov.omp_outlined, ptr [[J]], ptr [[SUM]])
|
||||
// IR-PCH-NEXT: ret i32 0
|
||||
//
|
||||
//
|
||||
// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined
|
||||
// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
// IR-PCH-NEXT: entry:
|
||||
// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 16
|
||||
// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[_TMP2:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[J3:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[J4:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
|
||||
// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// IR-PCH-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8
|
||||
// IR-PCH-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8
|
||||
// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8
|
||||
// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8
|
||||
// IR-PCH-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i32 0, i32 0, i32 0
|
||||
// IR-PCH-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]]
|
||||
// IR-PCH-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
|
||||
// IR-PCH: omp.arrayinit.body:
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
|
||||
// IR-PCH-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]]
|
||||
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
|
||||
// IR-PCH: omp.arrayinit.done:
|
||||
// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// IR-PCH-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99
|
||||
// IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// IR-PCH: cond.true:
|
||||
// IR-PCH-NEXT: br label [[COND_END:%.*]]
|
||||
// IR-PCH: cond.false:
|
||||
// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// IR-PCH-NEXT: br label [[COND_END]]
|
||||
// IR-PCH: cond.end:
|
||||
// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
|
||||
// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
|
||||
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// IR-PCH: omp.inner.for.cond:
|
||||
// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// IR-PCH-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
|
||||
// IR-PCH-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// IR-PCH: omp.inner.for.body:
|
||||
// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
|
||||
// IR-PCH-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
|
||||
// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
|
||||
// IR-PCH-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64
|
||||
// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @_Z3foov.omp_outlined.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[J3]], ptr [[SUM1]])
|
||||
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// IR-PCH: omp.inner.for.inc:
|
||||
// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
|
||||
// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
|
||||
// IR-PCH-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
|
||||
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
|
||||
// IR-PCH: omp.inner.for.end:
|
||||
// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// IR-PCH: omp.loop.exit:
|
||||
// IR-PCH-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
|
||||
// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]])
|
||||
// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// IR-PCH-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
|
||||
// IR-PCH-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
|
||||
// IR-PCH: .omp.lastprivate.then:
|
||||
// IR-PCH-NEXT: store i32 10, ptr [[J3]], align 4
|
||||
// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[J3]], align 4
|
||||
// IR-PCH-NEXT: store i32 [[TMP20]], ptr [[TMP0]], align 4
|
||||
// IR-PCH-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
|
||||
// IR-PCH: .omp.lastprivate.done:
|
||||
// IR-PCH-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
|
||||
// IR-PCH-NEXT: store ptr [[SUM1]], ptr [[TMP21]], align 8
|
||||
// IR-PCH-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
|
||||
// IR-PCH-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
|
||||
// IR-PCH-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
||||
// IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
||||
// IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
||||
// IR-PCH-NEXT: ]
|
||||
// IR-PCH: .omp.reduction.case1:
|
||||
// IR-PCH-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP25]]
|
||||
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
||||
// IR-PCH: omp.arraycpy.body:
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
||||
// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4
|
||||
// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
||||
// IR-PCH-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]]
|
||||
// IR-PCH-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP25]]
|
||||
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]]
|
||||
// IR-PCH: omp.arraycpy.done10:
|
||||
// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var)
|
||||
// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
||||
// IR-PCH: .omp.reduction.case2:
|
||||
// IR-PCH-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP1]], [[TMP28]]
|
||||
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]]
|
||||
// IR-PCH: omp.arraycpy.body12:
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ]
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ]
|
||||
// IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4
|
||||
// IR-PCH-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP29]] monotonic, align 4
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP28]]
|
||||
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]]
|
||||
// IR-PCH: omp.arraycpy.done18:
|
||||
// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
||||
// IR-PCH: .omp.reduction.default:
|
||||
// IR-PCH-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined
|
||||
// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] {
|
||||
// IR-PCH-NEXT: entry:
|
||||
// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
|
||||
// IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
|
||||
// IR-PCH-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[J3:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16
|
||||
// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[J5:%.*]] = alloca i32, align 4
|
||||
// IR-PCH-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
|
||||
// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
|
||||
// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
|
||||
// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
|
||||
// IR-PCH-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8
|
||||
// IR-PCH-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8
|
||||
// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8
|
||||
// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8
|
||||
// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
|
||||
// IR-PCH-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
|
||||
// IR-PCH-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32
|
||||
// IR-PCH-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
|
||||
// IR-PCH-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32
|
||||
// IR-PCH-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
|
||||
// IR-PCH-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
|
||||
// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
|
||||
// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// IR-PCH-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0
|
||||
// IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]]
|
||||
// IR-PCH-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
|
||||
// IR-PCH: omp.arrayinit.body:
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
|
||||
// IR-PCH-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]]
|
||||
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
|
||||
// IR-PCH: omp.arrayinit.done:
|
||||
// IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
|
||||
// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
|
||||
// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99
|
||||
// IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
|
||||
// IR-PCH: cond.true:
|
||||
// IR-PCH-NEXT: br label [[COND_END:%.*]]
|
||||
// IR-PCH: cond.false:
|
||||
// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
|
||||
// IR-PCH-NEXT: br label [[COND_END]]
|
||||
// IR-PCH: cond.end:
|
||||
// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ]
|
||||
// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
|
||||
// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
|
||||
// IR-PCH-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
|
||||
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
|
||||
// IR-PCH: omp.inner.for.cond:
|
||||
// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
|
||||
// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-PCH-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]]
|
||||
// IR-PCH-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
|
||||
// IR-PCH: omp.inner.for.body:
|
||||
// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP12]], 10
|
||||
// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
|
||||
// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
|
||||
// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-PCH-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP14]], 10
|
||||
// IR-PCH-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10
|
||||
// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], [[MUL8]]
|
||||
// IR-PCH-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1
|
||||
// IR-PCH-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]]
|
||||
// IR-PCH-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64
|
||||
// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]]
|
||||
// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-PCH-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP17]] to i64
|
||||
// IR-PCH-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]]
|
||||
// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-PCH-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP18]], [[TMP15]]
|
||||
// IR-PCH-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
|
||||
// IR-PCH: omp.body.continue:
|
||||
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
|
||||
// IR-PCH: omp.inner.for.inc:
|
||||
// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-PCH-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], 1
|
||||
// IR-PCH-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
|
||||
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
|
||||
// IR-PCH: omp.inner.for.end:
|
||||
// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// IR-PCH: omp.loop.exit:
|
||||
// IR-PCH-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
|
||||
// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
|
||||
// IR-PCH-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
|
||||
// IR-PCH-NEXT: store ptr [[SUM4]], ptr [[TMP22]], align 8
|
||||
// IR-PCH-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
|
||||
// IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
|
||||
// IR-PCH-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
|
||||
// IR-PCH-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
|
||||
// IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
|
||||
// IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
|
||||
// IR-PCH-NEXT: ]
|
||||
// IR-PCH: .omp.reduction.case1:
|
||||
// IR-PCH-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP26]]
|
||||
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
||||
// IR-PCH: omp.arraycpy.body:
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
||||
// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4
|
||||
// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
||||
// IR-PCH-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP27]], [[TMP28]]
|
||||
// IR-PCH-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP26]]
|
||||
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]]
|
||||
// IR-PCH: omp.arraycpy.done19:
|
||||
// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
|
||||
// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
||||
// IR-PCH: .omp.reduction.case2:
|
||||
// IR-PCH-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP1]], [[TMP29]]
|
||||
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]]
|
||||
// IR-PCH: omp.arraycpy.body21:
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ]
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ]
|
||||
// IR-PCH-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4
|
||||
// IR-PCH-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP30]] monotonic, align 4
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP29]]
|
||||
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]]
|
||||
// IR-PCH: omp.arraycpy.done27:
|
||||
// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
|
||||
// IR-PCH: .omp.reduction.default:
|
||||
// IR-PCH-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
|
||||
// IR-PCH-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
|
||||
// IR-PCH-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
|
||||
// IR-PCH: .omp.lastprivate.then:
|
||||
// IR-PCH-NEXT: store i32 10, ptr [[J3]], align 4
|
||||
// IR-PCH-NEXT: [[TMP34:%.*]] = load i32, ptr [[J3]], align 4
|
||||
// IR-PCH-NEXT: store i32 [[TMP34]], ptr [[TMP0]], align 4
|
||||
// IR-PCH-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
|
||||
// IR-PCH: .omp.lastprivate.done:
|
||||
// IR-PCH-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func
|
||||
// IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
|
||||
// IR-PCH-NEXT: entry:
|
||||
// IR-PCH-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
|
||||
// IR-PCH-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
|
||||
// IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
|
||||
// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
|
||||
// IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
|
||||
// IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
|
||||
// IR-PCH-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
|
||||
// IR-PCH-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
|
||||
// IR-PCH-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
|
||||
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
||||
// IR-PCH: omp.arraycpy.body:
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
||||
// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
||||
// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
||||
// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
|
||||
// IR-PCH-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
|
||||
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
|
||||
// IR-PCH: omp.arraycpy.done2:
|
||||
// IR-PCH-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp.reduction.reduction_func
|
||||
// IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] {
|
||||
// IR-PCH-NEXT: entry:
|
||||
// IR-PCH-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
|
||||
// IR-PCH-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
|
||||
// IR-PCH-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
|
||||
// IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
|
||||
// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
|
||||
// IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
|
||||
// IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
|
||||
// IR-PCH-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
|
||||
// IR-PCH-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
|
||||
// IR-PCH-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
|
||||
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
|
||||
// IR-PCH: omp.arraycpy.body:
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
|
||||
// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
||||
// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
|
||||
// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
|
||||
// IR-PCH-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
|
||||
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
|
||||
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
|
||||
// IR-PCH: omp.arraycpy.done2:
|
||||
// IR-PCH-NEXT: ret void
|
||||
//
|
||||
1894
clang/test/OpenMP/teams_generic_loop_collapse_codgen.cpp
Normal file
1894
clang/test/OpenMP/teams_generic_loop_collapse_codgen.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1918
clang/test/OpenMP/teams_generic_loop_private_codegen.cpp
Normal file
1918
clang/test/OpenMP/teams_generic_loop_private_codegen.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1524
clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp
Normal file
1524
clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user