
The IR Verifier requires that every call instruction to an inlineable function (among other things, its implementation must be visible in the translation unit) must also have !dbg metadata attached to it. When parallelizing, Polly emits calls to OpenMP runtime function out of thin air, or at least not directly derived from a bounded list of previous instruction. While we could search for instructions in the SCoP that has some debug info attached to it, there is no guarantee that we find any. Our solution is to generate a new DILocation that points to line 0 to represent optimized code. The OpenMP function implementation is usually not available in the user's translation unit, but can become visible in an LTO build. For the bug to appear, libomp must also be built with debug symbols. IMHO, the IR verifier rule is too strict. Runtime functions can also be inserted by other optimization passes, such as LoopIdiomRecognize. When inserting a call to e.g. memset, it uses the DebugLoc from a StoreInst from the unoptimized code. It is not required to have !dbg metadata attached either. Fixes #56692
568 lines
22 KiB
C++
568 lines
22 KiB
C++
//===------ LoopGeneratorsKMP.cpp - IR helper to create loops -------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file contains functions to create parallel loops as LLVM-IR.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "polly/CodeGen/LoopGeneratorsKMP.h"
|
|
#include "llvm/IR/Dominators.h"
|
|
#include "llvm/IR/Module.h"
|
|
|
|
using namespace llvm;
|
|
using namespace polly;
|
|
|
|
void ParallelLoopGeneratorKMP::createCallSpawnThreads(Value *SubFn,
|
|
Value *SubFnParam,
|
|
Value *LB, Value *UB,
|
|
Value *Stride) {
|
|
const std::string Name = "__kmpc_fork_call";
|
|
Function *F = M->getFunction(Name);
|
|
Type *KMPCMicroTy = StructType::getTypeByName(M->getContext(), "kmpc_micro");
|
|
|
|
if (!KMPCMicroTy) {
|
|
// void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...)
|
|
Type *MicroParams[] = {Builder.getInt32Ty()->getPointerTo(),
|
|
Builder.getInt32Ty()->getPointerTo()};
|
|
|
|
KMPCMicroTy = FunctionType::get(Builder.getVoidTy(), MicroParams, true);
|
|
}
|
|
|
|
// If F is not available, declare it.
|
|
if (!F) {
|
|
StructType *IdentTy =
|
|
StructType::getTypeByName(M->getContext(), "struct.ident_t");
|
|
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty(),
|
|
KMPCMicroTy->getPointerTo()};
|
|
|
|
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, true);
|
|
F = Function::Create(Ty, Linkage, Name, M);
|
|
}
|
|
|
|
Value *Task = Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
SubFn, KMPCMicroTy->getPointerTo());
|
|
|
|
Value *Args[] = {SourceLocationInfo,
|
|
Builder.getInt32(4) /* Number of arguments (w/o Task) */,
|
|
Task,
|
|
LB,
|
|
UB,
|
|
Stride,
|
|
SubFnParam};
|
|
|
|
CallInst *Call = Builder.CreateCall(F, Args);
|
|
Call->setDebugLoc(DLGenerated);
|
|
}
|
|
|
|
void ParallelLoopGeneratorKMP::deployParallelExecution(Function *SubFn,
|
|
Value *SubFnParam,
|
|
Value *LB, Value *UB,
|
|
Value *Stride) {
|
|
// Inform OpenMP runtime about the number of threads if greater than zero
|
|
if (PollyNumThreads > 0) {
|
|
Value *GlobalThreadID = createCallGlobalThreadNum();
|
|
createCallPushNumThreads(GlobalThreadID, Builder.getInt32(PollyNumThreads));
|
|
}
|
|
|
|
// Tell the runtime we start a parallel loop
|
|
createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
|
|
}
|
|
|
|
Function *ParallelLoopGeneratorKMP::prepareSubFnDefinition(Function *F) const {
|
|
std::vector<Type *> Arguments = {Builder.getInt32Ty()->getPointerTo(),
|
|
Builder.getInt32Ty()->getPointerTo(),
|
|
LongType,
|
|
LongType,
|
|
LongType,
|
|
Builder.getInt8PtrTy()};
|
|
|
|
FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
|
|
Function *SubFn = Function::Create(FT, Function::InternalLinkage,
|
|
F->getName() + "_polly_subfn", M);
|
|
// Name the function's arguments
|
|
Function::arg_iterator AI = SubFn->arg_begin();
|
|
AI->setName("polly.kmpc.global_tid");
|
|
std::advance(AI, 1);
|
|
AI->setName("polly.kmpc.bound_tid");
|
|
std::advance(AI, 1);
|
|
AI->setName("polly.kmpc.lb");
|
|
std::advance(AI, 1);
|
|
AI->setName("polly.kmpc.ub");
|
|
std::advance(AI, 1);
|
|
AI->setName("polly.kmpc.inc");
|
|
std::advance(AI, 1);
|
|
AI->setName("polly.kmpc.shared");
|
|
|
|
return SubFn;
|
|
}
|
|
|
|
// Create a subfunction of the following (preliminary) structure:
|
|
//
|
|
// PrevBB
|
|
// |
|
|
// v
|
|
// HeaderBB
|
|
// / | _____
|
|
// / v v |
|
|
// / PreHeaderBB |
|
|
// | | |
|
|
// | v |
|
|
// | CheckNextBB |
|
|
// \ | \_____/
|
|
// \ |
|
|
// v v
|
|
// ExitBB
|
|
//
|
|
// HeaderBB will hold allocations, loading of variables and kmp-init calls.
|
|
// CheckNextBB will check for more work (dynamic / static chunked) or will be
|
|
// empty (static non chunked).
|
|
// If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB.
|
|
// PreHeaderBB loads the new boundaries (& will lead to the loop body later on).
|
|
// Just like CheckNextBB: PreHeaderBB is (preliminary) empty in the static non
|
|
// chunked scheduling case. ExitBB marks the end of the parallel execution.
|
|
// The possibly empty BasicBlocks will automatically be removed.
|
|
std::tuple<Value *, Function *>
|
|
ParallelLoopGeneratorKMP::createSubFn(Value *SequentialLoopStride,
|
|
AllocaInst *StructData,
|
|
SetVector<Value *> Data, ValueMapT &Map) {
|
|
Function *SubFn = createSubFnDefinition();
|
|
LLVMContext &Context = SubFn->getContext();
|
|
|
|
// Store the previous basic block.
|
|
BasicBlock *PrevBB = Builder.GetInsertBlock();
|
|
|
|
// Create basic blocks.
|
|
BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
|
|
BasicBlock *ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
|
|
BasicBlock *CheckNextBB =
|
|
BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
|
|
BasicBlock *PreHeaderBB =
|
|
BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
|
|
|
|
DT.addNewBlock(HeaderBB, PrevBB);
|
|
DT.addNewBlock(ExitBB, HeaderBB);
|
|
DT.addNewBlock(CheckNextBB, HeaderBB);
|
|
DT.addNewBlock(PreHeaderBB, HeaderBB);
|
|
|
|
// Fill up basic block HeaderBB.
|
|
Builder.SetInsertPoint(HeaderBB);
|
|
Value *LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr");
|
|
Value *UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr");
|
|
Value *IsLastPtr = Builder.CreateAlloca(Builder.getInt32Ty(), nullptr,
|
|
"polly.par.lastIterPtr");
|
|
Value *StridePtr =
|
|
Builder.CreateAlloca(LongType, nullptr, "polly.par.StridePtr");
|
|
|
|
// Get iterator for retrieving the previously defined parameters.
|
|
Function::arg_iterator AI = SubFn->arg_begin();
|
|
// First argument holds "global thread ID".
|
|
Value *IDPtr = &*AI;
|
|
// Skip "bound thread ID" since it is not used (but had to be defined).
|
|
std::advance(AI, 2);
|
|
// Move iterator to: LB, UB, Stride, Shared variable struct.
|
|
Value *LB = &*AI;
|
|
std::advance(AI, 1);
|
|
Value *UB = &*AI;
|
|
std::advance(AI, 1);
|
|
Value *Stride = &*AI;
|
|
std::advance(AI, 1);
|
|
Value *Shared = &*AI;
|
|
|
|
Value *UserContext = Builder.CreateBitCast(Shared, StructData->getType(),
|
|
"polly.par.userContext");
|
|
|
|
extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext,
|
|
Map);
|
|
|
|
const auto Alignment = llvm::Align(is64BitArch() ? 8 : 4);
|
|
Value *ID = Builder.CreateAlignedLoad(Builder.getInt32Ty(), IDPtr, Alignment,
|
|
"polly.par.global_tid");
|
|
|
|
Builder.CreateAlignedStore(LB, LBPtr, Alignment);
|
|
Builder.CreateAlignedStore(UB, UBPtr, Alignment);
|
|
Builder.CreateAlignedStore(Builder.getInt32(0), IsLastPtr, Alignment);
|
|
Builder.CreateAlignedStore(Stride, StridePtr, Alignment);
|
|
|
|
// Subtract one as the upper bound provided by openmp is a < comparison
|
|
// whereas the codegenForSequential function creates a <= comparison.
|
|
Value *AdjustedUB = Builder.CreateAdd(UB, ConstantInt::get(LongType, -1),
|
|
"polly.indvar.UBAdjusted");
|
|
|
|
Value *ChunkSize =
|
|
ConstantInt::get(LongType, std::max<int>(PollyChunkSize, 1));
|
|
|
|
OMPGeneralSchedulingType Scheduling =
|
|
getSchedType(PollyChunkSize, PollyScheduling);
|
|
|
|
switch (Scheduling) {
|
|
case OMPGeneralSchedulingType::Dynamic:
|
|
case OMPGeneralSchedulingType::Guided:
|
|
case OMPGeneralSchedulingType::Runtime:
|
|
// "DYNAMIC" scheduling types are handled below (including 'runtime')
|
|
{
|
|
UB = AdjustedUB;
|
|
createCallDispatchInit(ID, LB, UB, Stride, ChunkSize);
|
|
Value *HasWork =
|
|
createCallDispatchNext(ID, IsLastPtr, LBPtr, UBPtr, StridePtr);
|
|
Value *HasIteration =
|
|
Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ, HasWork,
|
|
Builder.getInt32(1), "polly.hasIteration");
|
|
Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
|
|
|
|
Builder.SetInsertPoint(CheckNextBB);
|
|
HasWork = createCallDispatchNext(ID, IsLastPtr, LBPtr, UBPtr, StridePtr);
|
|
HasIteration =
|
|
Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ, HasWork,
|
|
Builder.getInt32(1), "polly.hasWork");
|
|
Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
|
|
|
|
Builder.SetInsertPoint(PreHeaderBB);
|
|
LB = Builder.CreateAlignedLoad(LongType, LBPtr, Alignment,
|
|
"polly.indvar.LB");
|
|
UB = Builder.CreateAlignedLoad(LongType, UBPtr, Alignment,
|
|
"polly.indvar.UB");
|
|
}
|
|
break;
|
|
case OMPGeneralSchedulingType::StaticChunked:
|
|
case OMPGeneralSchedulingType::StaticNonChunked:
|
|
// "STATIC" scheduling types are handled below
|
|
{
|
|
Builder.CreateAlignedStore(AdjustedUB, UBPtr, Alignment);
|
|
createCallStaticInit(ID, IsLastPtr, LBPtr, UBPtr, StridePtr, ChunkSize);
|
|
|
|
Value *ChunkedStride = Builder.CreateAlignedLoad(
|
|
LongType, StridePtr, Alignment, "polly.kmpc.stride");
|
|
|
|
LB = Builder.CreateAlignedLoad(LongType, LBPtr, Alignment,
|
|
"polly.indvar.LB");
|
|
UB = Builder.CreateAlignedLoad(LongType, UBPtr, Alignment,
|
|
"polly.indvar.UB.temp");
|
|
|
|
Value *UBInRange =
|
|
Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SLE, UB, AdjustedUB,
|
|
"polly.indvar.UB.inRange");
|
|
UB = Builder.CreateSelect(UBInRange, UB, AdjustedUB, "polly.indvar.UB");
|
|
Builder.CreateAlignedStore(UB, UBPtr, Alignment);
|
|
|
|
Value *HasIteration = Builder.CreateICmp(
|
|
llvm::CmpInst::Predicate::ICMP_SLE, LB, UB, "polly.hasIteration");
|
|
Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
|
|
|
|
if (Scheduling == OMPGeneralSchedulingType::StaticChunked) {
|
|
Builder.SetInsertPoint(PreHeaderBB);
|
|
LB = Builder.CreateAlignedLoad(LongType, LBPtr, Alignment,
|
|
"polly.indvar.LB.entry");
|
|
UB = Builder.CreateAlignedLoad(LongType, UBPtr, Alignment,
|
|
"polly.indvar.UB.entry");
|
|
}
|
|
|
|
Builder.SetInsertPoint(CheckNextBB);
|
|
|
|
if (Scheduling == OMPGeneralSchedulingType::StaticChunked) {
|
|
Value *NextLB =
|
|
Builder.CreateAdd(LB, ChunkedStride, "polly.indvar.nextLB");
|
|
Value *NextUB = Builder.CreateAdd(UB, ChunkedStride);
|
|
|
|
Value *NextUBOutOfBounds =
|
|
Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SGT, NextUB,
|
|
AdjustedUB, "polly.indvar.nextUB.outOfBounds");
|
|
NextUB = Builder.CreateSelect(NextUBOutOfBounds, AdjustedUB, NextUB,
|
|
"polly.indvar.nextUB");
|
|
|
|
Builder.CreateAlignedStore(NextLB, LBPtr, Alignment);
|
|
Builder.CreateAlignedStore(NextUB, UBPtr, Alignment);
|
|
|
|
Value *HasWork =
|
|
Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SLE, NextLB,
|
|
AdjustedUB, "polly.hasWork");
|
|
Builder.CreateCondBr(HasWork, PreHeaderBB, ExitBB);
|
|
} else {
|
|
Builder.CreateBr(ExitBB);
|
|
}
|
|
|
|
Builder.SetInsertPoint(PreHeaderBB);
|
|
}
|
|
break;
|
|
}
|
|
|
|
Builder.CreateBr(CheckNextBB);
|
|
Builder.SetInsertPoint(&*--Builder.GetInsertPoint());
|
|
BasicBlock *AfterBB;
|
|
Value *IV = createLoop(LB, UB, SequentialLoopStride, Builder, LI, DT, AfterBB,
|
|
ICmpInst::ICMP_SLE, nullptr, true,
|
|
/* UseGuard */ false);
|
|
|
|
BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
|
|
|
|
// Add code to terminate this subfunction.
|
|
Builder.SetInsertPoint(ExitBB);
|
|
// Static (i.e. non-dynamic) scheduling types, are terminated with a fini-call
|
|
if (Scheduling == OMPGeneralSchedulingType::StaticChunked ||
|
|
Scheduling == OMPGeneralSchedulingType::StaticNonChunked) {
|
|
createCallStaticFini(ID);
|
|
}
|
|
Builder.CreateRetVoid();
|
|
Builder.SetInsertPoint(&*LoopBody);
|
|
|
|
return std::make_tuple(IV, SubFn);
|
|
}
|
|
|
|
Value *ParallelLoopGeneratorKMP::createCallGlobalThreadNum() {
|
|
const std::string Name = "__kmpc_global_thread_num";
|
|
Function *F = M->getFunction(Name);
|
|
|
|
// If F is not available, declare it.
|
|
if (!F) {
|
|
StructType *IdentTy =
|
|
StructType::getTypeByName(M->getContext(), "struct.ident_t");
|
|
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
Type *Params[] = {IdentTy->getPointerTo()};
|
|
|
|
FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Params, false);
|
|
F = Function::Create(Ty, Linkage, Name, M);
|
|
}
|
|
|
|
CallInst *Call = Builder.CreateCall(F, {SourceLocationInfo});
|
|
Call->setDebugLoc(DLGenerated);
|
|
return Call;
|
|
}
|
|
|
|
void ParallelLoopGeneratorKMP::createCallPushNumThreads(Value *GlobalThreadID,
|
|
Value *NumThreads) {
|
|
const std::string Name = "__kmpc_push_num_threads";
|
|
Function *F = M->getFunction(Name);
|
|
|
|
// If F is not available, declare it.
|
|
if (!F) {
|
|
StructType *IdentTy =
|
|
StructType::getTypeByName(M->getContext(), "struct.ident_t");
|
|
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty(),
|
|
Builder.getInt32Ty()};
|
|
|
|
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
|
|
F = Function::Create(Ty, Linkage, Name, M);
|
|
}
|
|
|
|
Value *Args[] = {SourceLocationInfo, GlobalThreadID, NumThreads};
|
|
|
|
CallInst *Call = Builder.CreateCall(F, Args);
|
|
Call->setDebugLoc(DLGenerated);
|
|
}
|
|
|
|
void ParallelLoopGeneratorKMP::createCallStaticInit(Value *GlobalThreadID,
|
|
Value *IsLastPtr,
|
|
Value *LBPtr, Value *UBPtr,
|
|
Value *StridePtr,
|
|
Value *ChunkSize) {
|
|
const std::string Name =
|
|
is64BitArch() ? "__kmpc_for_static_init_8" : "__kmpc_for_static_init_4";
|
|
Function *F = M->getFunction(Name);
|
|
StructType *IdentTy =
|
|
StructType::getTypeByName(M->getContext(), "struct.ident_t");
|
|
|
|
// If F is not available, declare it.
|
|
if (!F) {
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
|
|
Type *Params[] = {IdentTy->getPointerTo(),
|
|
Builder.getInt32Ty(),
|
|
Builder.getInt32Ty(),
|
|
Builder.getInt32Ty()->getPointerTo(),
|
|
LongType->getPointerTo(),
|
|
LongType->getPointerTo(),
|
|
LongType->getPointerTo(),
|
|
LongType,
|
|
LongType};
|
|
|
|
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
|
|
F = Function::Create(Ty, Linkage, Name, M);
|
|
}
|
|
|
|
// The parameter 'ChunkSize' will hold strictly positive integer values,
|
|
// regardless of PollyChunkSize's value
|
|
Value *Args[] = {
|
|
SourceLocationInfo,
|
|
GlobalThreadID,
|
|
Builder.getInt32(int(getSchedType(PollyChunkSize, PollyScheduling))),
|
|
IsLastPtr,
|
|
LBPtr,
|
|
UBPtr,
|
|
StridePtr,
|
|
ConstantInt::get(LongType, 1),
|
|
ChunkSize};
|
|
|
|
CallInst *Call = Builder.CreateCall(F, Args);
|
|
Call->setDebugLoc(DLGenerated);
|
|
}
|
|
|
|
void ParallelLoopGeneratorKMP::createCallStaticFini(Value *GlobalThreadID) {
|
|
const std::string Name = "__kmpc_for_static_fini";
|
|
Function *F = M->getFunction(Name);
|
|
StructType *IdentTy =
|
|
StructType::getTypeByName(M->getContext(), "struct.ident_t");
|
|
|
|
// If F is not available, declare it.
|
|
if (!F) {
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty()};
|
|
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
|
|
F = Function::Create(Ty, Linkage, Name, M);
|
|
}
|
|
|
|
Value *Args[] = {SourceLocationInfo, GlobalThreadID};
|
|
|
|
CallInst *Call = Builder.CreateCall(F, Args);
|
|
Call->setDebugLoc(DLGenerated);
|
|
}
|
|
|
|
void ParallelLoopGeneratorKMP::createCallDispatchInit(Value *GlobalThreadID,
|
|
Value *LB, Value *UB,
|
|
Value *Inc,
|
|
Value *ChunkSize) {
|
|
const std::string Name =
|
|
is64BitArch() ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_4";
|
|
Function *F = M->getFunction(Name);
|
|
StructType *IdentTy =
|
|
StructType::getTypeByName(M->getContext(), "struct.ident_t");
|
|
|
|
// If F is not available, declare it.
|
|
if (!F) {
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
|
|
Type *Params[] = {IdentTy->getPointerTo(),
|
|
Builder.getInt32Ty(),
|
|
Builder.getInt32Ty(),
|
|
LongType,
|
|
LongType,
|
|
LongType,
|
|
LongType};
|
|
|
|
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
|
|
F = Function::Create(Ty, Linkage, Name, M);
|
|
}
|
|
|
|
// The parameter 'ChunkSize' will hold strictly positive integer values,
|
|
// regardless of PollyChunkSize's value
|
|
Value *Args[] = {
|
|
SourceLocationInfo,
|
|
GlobalThreadID,
|
|
Builder.getInt32(int(getSchedType(PollyChunkSize, PollyScheduling))),
|
|
LB,
|
|
UB,
|
|
Inc,
|
|
ChunkSize};
|
|
|
|
CallInst *Call = Builder.CreateCall(F, Args);
|
|
Call->setDebugLoc(DLGenerated);
|
|
}
|
|
|
|
Value *ParallelLoopGeneratorKMP::createCallDispatchNext(Value *GlobalThreadID,
|
|
Value *IsLastPtr,
|
|
Value *LBPtr,
|
|
Value *UBPtr,
|
|
Value *StridePtr) {
|
|
const std::string Name =
|
|
is64BitArch() ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_4";
|
|
Function *F = M->getFunction(Name);
|
|
StructType *IdentTy =
|
|
StructType::getTypeByName(M->getContext(), "struct.ident_t");
|
|
|
|
// If F is not available, declare it.
|
|
if (!F) {
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
|
|
Type *Params[] = {IdentTy->getPointerTo(),
|
|
Builder.getInt32Ty(),
|
|
Builder.getInt32Ty()->getPointerTo(),
|
|
LongType->getPointerTo(),
|
|
LongType->getPointerTo(),
|
|
LongType->getPointerTo()};
|
|
|
|
FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Params, false);
|
|
F = Function::Create(Ty, Linkage, Name, M);
|
|
}
|
|
|
|
Value *Args[] = {SourceLocationInfo, GlobalThreadID, IsLastPtr, LBPtr, UBPtr,
|
|
StridePtr};
|
|
|
|
CallInst *Call = Builder.CreateCall(F, Args);
|
|
Call->setDebugLoc(DLGenerated);
|
|
return Call;
|
|
}
|
|
|
|
// TODO: This function currently creates a source location dummy. It might be
|
|
// necessary to (actually) provide information, in the future.
|
|
GlobalVariable *ParallelLoopGeneratorKMP::createSourceLocation() {
|
|
const std::string LocName = ".loc.dummy";
|
|
GlobalVariable *SourceLocDummy = M->getGlobalVariable(LocName);
|
|
|
|
if (SourceLocDummy == nullptr) {
|
|
const std::string StructName = "struct.ident_t";
|
|
StructType *IdentTy =
|
|
StructType::getTypeByName(M->getContext(), StructName);
|
|
|
|
// If the ident_t StructType is not available, declare it.
|
|
// in LLVM-IR: ident_t = type { i32, i32, i32, i32, i8* }
|
|
if (!IdentTy) {
|
|
Type *LocMembers[] = {Builder.getInt32Ty(), Builder.getInt32Ty(),
|
|
Builder.getInt32Ty(), Builder.getInt32Ty(),
|
|
Builder.getInt8PtrTy()};
|
|
|
|
IdentTy =
|
|
StructType::create(M->getContext(), LocMembers, StructName, false);
|
|
}
|
|
|
|
const auto ArrayType =
|
|
llvm::ArrayType::get(Builder.getInt8Ty(), /* Length */ 23);
|
|
|
|
// Global Variable Definitions
|
|
GlobalVariable *StrVar =
|
|
new GlobalVariable(*M, ArrayType, true, GlobalValue::PrivateLinkage,
|
|
nullptr, ".str.ident");
|
|
StrVar->setAlignment(llvm::Align(1));
|
|
|
|
SourceLocDummy = new GlobalVariable(
|
|
*M, IdentTy, true, GlobalValue::PrivateLinkage, nullptr, LocName);
|
|
SourceLocDummy->setAlignment(llvm::Align(8));
|
|
|
|
// Constant Definitions
|
|
Constant *InitStr = ConstantDataArray::getString(
|
|
M->getContext(), "Source location dummy.", true);
|
|
|
|
Constant *StrPtr = static_cast<Constant *>(Builder.CreateInBoundsGEP(
|
|
ArrayType, StrVar, {Builder.getInt32(0), Builder.getInt32(0)}));
|
|
|
|
Constant *LocInitStruct = ConstantStruct::get(
|
|
IdentTy, {Builder.getInt32(0), Builder.getInt32(0), Builder.getInt32(0),
|
|
Builder.getInt32(0), StrPtr});
|
|
|
|
// Initialize variables
|
|
StrVar->setInitializer(InitStr);
|
|
SourceLocDummy->setInitializer(LocInitStruct);
|
|
}
|
|
|
|
return SourceLocDummy;
|
|
}
|
|
|
|
bool ParallelLoopGeneratorKMP::is64BitArch() {
|
|
return (LongType->getIntegerBitWidth() == 64);
|
|
}
|
|
|
|
OMPGeneralSchedulingType ParallelLoopGeneratorKMP::getSchedType(
|
|
int ChunkSize, OMPGeneralSchedulingType Scheduling) const {
|
|
if (ChunkSize == 0 && Scheduling == OMPGeneralSchedulingType::StaticChunked)
|
|
return OMPGeneralSchedulingType::StaticNonChunked;
|
|
|
|
return Scheduling;
|
|
}
|