Compare commits
14 Commits
main
...
users/skat
Author | SHA1 | Date | |
---|---|---|---|
![]() |
bdd6b36a93 | ||
![]() |
2d33426786 | ||
![]() |
38a38bb056 | ||
![]() |
e57c671e6f | ||
![]() |
bd81572292 | ||
![]() |
be955670bb | ||
![]() |
06a2570f2b | ||
![]() |
4da11cfd7d | ||
![]() |
688b61435b | ||
![]() |
1b7dd6ce24 | ||
![]() |
5a73674be8 | ||
![]() |
94936248fd | ||
![]() |
9e948a58af | ||
![]() |
39800face1 |
@ -10500,8 +10500,8 @@ void CGOpenMPRuntime::emitTargetDataCalls(
|
||||
llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
|
||||
llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
|
||||
cantFail(OMPBuilder.createTargetData(
|
||||
OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
|
||||
CustomMapperCB,
|
||||
OmpLoc, AllocaIP, CodeGenIP, /*DeallocIPs=*/{}, DeviceID, IfCondVal,
|
||||
Info, GenMapInfoCB, CustomMapperCB,
|
||||
/*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc));
|
||||
CGF.Builder.restoreIP(AfterIP);
|
||||
}
|
||||
|
@ -1835,10 +1835,10 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
|
||||
const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
|
||||
const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
|
||||
|
||||
auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
|
||||
InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&, this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
|
||||
*this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel");
|
||||
*this, ParallelRegionBodyStmt, AllocIP, CodeGenIP, "parallel");
|
||||
return llvm::Error::success();
|
||||
};
|
||||
|
||||
@ -1846,9 +1846,10 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
|
||||
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
|
||||
llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
|
||||
AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
|
||||
llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
|
||||
OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
|
||||
IfCond, NumThreads, ProcBind, S.hasCancel()));
|
||||
llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
|
||||
cantFail(OMPBuilder.createParallel(
|
||||
Builder, AllocaIP, /*DeallocIPs=*/{}, BodyGenCB, PrivCB, FiniCB,
|
||||
IfCond, NumThreads, ProcBind, S.hasCancel()));
|
||||
Builder.restoreIP(AfterIP);
|
||||
return;
|
||||
}
|
||||
@ -4361,21 +4362,23 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
|
||||
llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
|
||||
if (CS) {
|
||||
for (const Stmt *SubStmt : CS->children()) {
|
||||
auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
|
||||
InsertPointTy CodeGenIP) {
|
||||
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
|
||||
*this, SubStmt, AllocaIP, CodeGenIP, "section");
|
||||
auto SectionCB = [this, SubStmt](InsertPointTy AllocIP,
|
||||
InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(*this, SubStmt, AllocIP,
|
||||
CodeGenIP, "section");
|
||||
return llvm::Error::success();
|
||||
};
|
||||
SectionCBVector.push_back(SectionCB);
|
||||
}
|
||||
} else {
|
||||
auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
|
||||
InsertPointTy CodeGenIP) {
|
||||
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
|
||||
*this, CapturedStmt, AllocaIP, CodeGenIP, "section");
|
||||
return llvm::Error::success();
|
||||
};
|
||||
auto SectionCB =
|
||||
[this, CapturedStmt](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
|
||||
*this, CapturedStmt, AllocIP, CodeGenIP, "section");
|
||||
return llvm::Error::success();
|
||||
};
|
||||
SectionCBVector.push_back(SectionCB);
|
||||
}
|
||||
|
||||
@ -4429,10 +4432,11 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
|
||||
return llvm::Error::success();
|
||||
};
|
||||
|
||||
auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
|
||||
InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [SectionRegionBodyStmt,
|
||||
this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
|
||||
*this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section");
|
||||
*this, SectionRegionBodyStmt, AllocIP, CodeGenIP, "section");
|
||||
return llvm::Error::success();
|
||||
};
|
||||
|
||||
@ -4514,10 +4518,11 @@ void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
|
||||
return llvm::Error::success();
|
||||
};
|
||||
|
||||
auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
|
||||
InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [MasterRegionBodyStmt,
|
||||
this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
|
||||
*this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master");
|
||||
*this, MasterRegionBodyStmt, AllocIP, CodeGenIP, "master");
|
||||
return llvm::Error::success();
|
||||
};
|
||||
|
||||
@ -4564,10 +4569,11 @@ void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
|
||||
return llvm::Error::success();
|
||||
};
|
||||
|
||||
auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
|
||||
InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [MaskedRegionBodyStmt,
|
||||
this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
|
||||
*this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked");
|
||||
*this, MaskedRegionBodyStmt, AllocIP, CodeGenIP, "masked");
|
||||
return llvm::Error::success();
|
||||
};
|
||||
|
||||
@ -4607,10 +4613,11 @@ void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
|
||||
return llvm::Error::success();
|
||||
};
|
||||
|
||||
auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
|
||||
InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [CriticalRegionBodyStmt,
|
||||
this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
|
||||
*this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical");
|
||||
*this, CriticalRegionBodyStmt, AllocIP, CodeGenIP, "critical");
|
||||
return llvm::Error::success();
|
||||
};
|
||||
|
||||
@ -5577,8 +5584,8 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
|
||||
InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
|
||||
AllocaInsertPt->getIterator());
|
||||
|
||||
auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
|
||||
InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&, this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
|
||||
return llvm::Error::success();
|
||||
@ -5587,7 +5594,8 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
|
||||
if (!CapturedStmtInfo)
|
||||
CapturedStmtInfo = &CapStmtInfo;
|
||||
llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
|
||||
cantFail(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB));
|
||||
cantFail(OMPBuilder.createTaskgroup(Builder, AllocaIP,
|
||||
/*DeallocIPs=*/{}, BodyGenCB));
|
||||
Builder.restoreIP(AfterIP);
|
||||
return;
|
||||
}
|
||||
@ -6167,8 +6175,9 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
|
||||
return llvm::Error::success();
|
||||
};
|
||||
|
||||
auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
|
||||
InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&S, C, this](InsertPointTy AllocIP,
|
||||
InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
|
||||
const CapturedStmt *CS = S.getInnermostCapturedStmt();
|
||||
@ -6186,7 +6195,7 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
|
||||
OutlinedFn, CapturedVars);
|
||||
} else {
|
||||
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
|
||||
*this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered");
|
||||
*this, CS->getCapturedStmt(), AllocIP, CodeGenIP, "ordered");
|
||||
}
|
||||
return llvm::Error::success();
|
||||
};
|
||||
|
@ -31,6 +31,7 @@
|
||||
|
||||
namespace llvm {
|
||||
class CanonicalLoopInfo;
|
||||
class CodeExtractor;
|
||||
class ScanInfo;
|
||||
struct TargetRegionEntryInfo;
|
||||
class OffloadEntriesInfoManager;
|
||||
@ -601,17 +602,19 @@ public:
|
||||
/// such InsertPoints need to be preserved, it can split the block itself
|
||||
/// before calling the callback.
|
||||
///
|
||||
/// AllocaIP and CodeGenIP must not point to the same position.
|
||||
///
|
||||
/// \param AllocaIP is the insertion point at which new alloca instructions
|
||||
/// should be placed. The BasicBlock it is pointing to must
|
||||
/// not be split.
|
||||
/// \param CodeGenIP is the insertion point at which the body code should be
|
||||
/// placed.
|
||||
/// AllocIP and CodeGenIP must not point to the same position.
|
||||
///
|
||||
/// \param AllocIP is the insertion point at which new allocations should
|
||||
/// be placed. The BasicBlock it is pointing to must not be
|
||||
/// split.
|
||||
/// \param CodeGenIP is the insertion point at which the body code should be
|
||||
/// placed.
|
||||
/// \param DeallocIPs is the list of insertion points where explicit
|
||||
/// deallocations, if needed, should be placed.
|
||||
/// \return an error, if any were triggered during execution.
|
||||
using BodyGenCallbackTy =
|
||||
function_ref<Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
|
||||
function_ref<Error(InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs)>;
|
||||
|
||||
// This is created primarily for sections construct as llvm::function_ref
|
||||
// (BodyGenCallbackTy) is not storable (as described in the comments of
|
||||
@ -620,7 +623,8 @@ public:
|
||||
///
|
||||
/// \return an error, if any were triggered during execution.
|
||||
using StorableBodyGenCallbackTy =
|
||||
std::function<Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
|
||||
std::function<Error(InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs)>;
|
||||
|
||||
/// Callback type for loop body code generation.
|
||||
///
|
||||
@ -714,7 +718,9 @@ public:
|
||||
/// Generator for '#omp parallel'
|
||||
///
|
||||
/// \param Loc The insert and source location description.
|
||||
/// \param AllocaIP The insertion points to be used for alloca instructions.
|
||||
/// \param AllocIP The insertion point to be used for allocations.
|
||||
/// \param DeallocIPs The insertion points to be used for explicit
|
||||
/// deallocations, if needed.
|
||||
/// \param BodyGenCB Callback that will generate the region code.
|
||||
/// \param PrivCB Callback to copy a given variable (think copy constructor).
|
||||
/// \param FiniCB Callback to finalize variable copies.
|
||||
@ -725,10 +731,10 @@ public:
|
||||
///
|
||||
/// \returns The insertion position *after* the parallel.
|
||||
LLVM_ABI InsertPointOrErrorTy createParallel(
|
||||
const LocationDescription &Loc, InsertPointTy AllocaIP,
|
||||
BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
|
||||
FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
|
||||
omp::ProcBindKind ProcBind, bool IsCancellable);
|
||||
const LocationDescription &Loc, InsertPointTy AllocIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs, BodyGenCallbackTy BodyGenCB,
|
||||
PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition,
|
||||
Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable);
|
||||
|
||||
/// Generator for the control flow structure of an OpenMP canonical loop.
|
||||
///
|
||||
@ -1346,7 +1352,9 @@ public:
|
||||
/// Generator for `#omp task`
|
||||
///
|
||||
/// \param Loc The location where the task construct was encountered.
|
||||
/// \param AllocaIP The insertion point to be used for alloca instructions.
|
||||
/// \param AllocIP The insertion point to be used for allocations.
|
||||
/// \param DeallocIPs The insertion points to be used for explicit
|
||||
/// deallocations, if needed.
|
||||
/// \param BodyGenCB Callback that will generate the region code.
|
||||
/// \param Tied True if the task is tied, false if the task is untied.
|
||||
/// \param Final i1 value which is `true` if the task is final, `false` if the
|
||||
@ -1362,21 +1370,23 @@ public:
|
||||
/// \param Mergeable If the given task is `mergeable`
|
||||
/// \param priority `priority-value' specifies the execution order of the
|
||||
/// tasks that is generated by the construct
|
||||
LLVM_ABI InsertPointOrErrorTy
|
||||
createTask(const LocationDescription &Loc, InsertPointTy AllocaIP,
|
||||
BodyGenCallbackTy BodyGenCB, bool Tied = true,
|
||||
Value *Final = nullptr, Value *IfCondition = nullptr,
|
||||
SmallVector<DependData> Dependencies = {}, bool Mergeable = false,
|
||||
Value *EventHandle = nullptr, Value *Priority = nullptr);
|
||||
LLVM_ABI InsertPointOrErrorTy createTask(
|
||||
const LocationDescription &Loc, InsertPointTy AllocIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs, BodyGenCallbackTy BodyGenCB,
|
||||
bool Tied = true, Value *Final = nullptr, Value *IfCondition = nullptr,
|
||||
SmallVector<DependData> Dependencies = {}, bool Mergeable = false,
|
||||
Value *EventHandle = nullptr, Value *Priority = nullptr);
|
||||
|
||||
/// Generator for the taskgroup construct
|
||||
///
|
||||
/// \param Loc The location where the taskgroup construct was encountered.
|
||||
/// \param AllocaIP The insertion point to be used for alloca instructions.
|
||||
/// \param AllocIP The insertion point to be used for allocations.
|
||||
/// \param DeallocIPs The insertion point to be used for explicit deallocation
|
||||
/// instructions, if needed.
|
||||
/// \param BodyGenCB Callback that will generate the region code.
|
||||
LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc,
|
||||
InsertPointTy AllocaIP,
|
||||
BodyGenCallbackTy BodyGenCB);
|
||||
LLVM_ABI InsertPointOrErrorTy createTaskgroup(
|
||||
const LocationDescription &Loc, InsertPointTy AllocIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs, BodyGenCallbackTy BodyGenCB);
|
||||
|
||||
using FileIdentifierInfoCallbackTy =
|
||||
std::function<std::tuple<std::string, uint64_t>()>;
|
||||
@ -2245,20 +2255,31 @@ public:
|
||||
struct OutlineInfo {
|
||||
using PostOutlineCBTy = std::function<void(Function &)>;
|
||||
PostOutlineCBTy PostOutlineCB;
|
||||
BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB;
|
||||
BasicBlock *EntryBB, *ExitBB, *OuterAllocBB;
|
||||
SmallVector<BasicBlock *> OuterDeallocBBs;
|
||||
SmallVector<Value *, 2> ExcludeArgsFromAggregate;
|
||||
|
||||
LLVM_ABI virtual ~OutlineInfo() = default;
|
||||
|
||||
/// Collect all blocks in between EntryBB and ExitBB in both the given
|
||||
/// vector and set.
|
||||
LLVM_ABI void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
|
||||
SmallVectorImpl<BasicBlock *> &BlockVector);
|
||||
|
||||
/// Create a CodeExtractor instance based on the information stored in this
|
||||
/// structure, the list of collected blocks from a previous call to
|
||||
/// \c collectBlocks and a flag stating whether arguments must be passed in
|
||||
/// address space 0.
|
||||
LLVM_ABI virtual std::unique_ptr<CodeExtractor>
|
||||
createCodeExtractor(ArrayRef<BasicBlock *> Blocks,
|
||||
bool ArgsInZeroAddressSpace, Twine Suffix = Twine(""));
|
||||
|
||||
/// Return the function that contains the region to be outlined.
|
||||
Function *getFunction() const { return EntryBB->getParent(); }
|
||||
};
|
||||
|
||||
/// Collection of regions that need to be outlined during finalization.
|
||||
SmallVector<OutlineInfo, 16> OutlineInfos;
|
||||
SmallVector<std::unique_ptr<OutlineInfo>, 16> OutlineInfos;
|
||||
|
||||
/// A collection of candidate target functions that's constant allocas will
|
||||
/// attempt to be raised on a call of finalize after all currently enqueued
|
||||
@ -2273,7 +2294,9 @@ public:
|
||||
std::forward_list<ScanInfo> ScanInfos;
|
||||
|
||||
/// Add a new region that will be outlined later.
|
||||
void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
|
||||
void addOutlineInfo(std::unique_ptr<OutlineInfo> &&OI) {
|
||||
OutlineInfos.emplace_back(std::move(OI));
|
||||
}
|
||||
|
||||
/// An ordered map of auto-generated variables to their unique names.
|
||||
/// It stores variables with the following names: 1) ".gomp_critical_user_" +
|
||||
@ -2306,7 +2329,8 @@ public:
|
||||
/// \return an error, if any were triggered during execution.
|
||||
LLVM_ABI Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen,
|
||||
BodyGenCallbackTy ElseGen,
|
||||
InsertPointTy AllocaIP = {});
|
||||
InsertPointTy AllocIP = {},
|
||||
ArrayRef<InsertPointTy> DeallocIPs = {});
|
||||
|
||||
/// Create the global variable holding the offload mappings information.
|
||||
LLVM_ABI GlobalVariable *
|
||||
@ -2861,11 +2885,13 @@ public:
|
||||
/// Generator for `#omp distribute`
|
||||
///
|
||||
/// \param Loc The location where the distribute construct was encountered.
|
||||
/// \param AllocaIP The insertion points to be used for alloca instructions.
|
||||
/// \param AllocIP The insertion point to be used for allocations.
|
||||
/// \param DeallocIPs The insertion points to be used for explicit
|
||||
/// deallocations, if needed.
|
||||
/// \param BodyGenCB Callback that will generate the region code.
|
||||
LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc,
|
||||
InsertPointTy AllocaIP,
|
||||
BodyGenCallbackTy BodyGenCB);
|
||||
LLVM_ABI InsertPointOrErrorTy createDistribute(
|
||||
const LocationDescription &Loc, InsertPointTy AllocIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs, BodyGenCallbackTy BodyGenCB);
|
||||
|
||||
/// Generate conditional branch and relevant BasicBlocks through which private
|
||||
/// threads copy the 'copyin' variables from Master copy to threadprivate
|
||||
@ -2907,6 +2933,29 @@ public:
|
||||
LLVM_ABI CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
|
||||
Value *Allocator, std::string Name = "");
|
||||
|
||||
/// Create a runtime call for kmpc_alloc_shared.
|
||||
///
|
||||
/// \param Loc The insert and source location description.
|
||||
/// \param VarType Type of variable to be allocated.
|
||||
/// \param Name Name of call Instruction.
|
||||
///
|
||||
/// \returns CallInst to the kmpc_alloc_shared call.
|
||||
LLVM_ABI CallInst *createOMPAllocShared(const LocationDescription &Loc,
|
||||
Type *VarType,
|
||||
const Twine &Name = Twine(""));
|
||||
|
||||
/// Create a runtime call for kmpc_free_shared.
|
||||
///
|
||||
/// \param Loc The insert and source location description.
|
||||
/// \param Addr Value obtained from the corresponding kmpc_alloc_shared call.
|
||||
/// \param VarType Type of variable to be freed.
|
||||
/// \param Name Name of call Instruction.
|
||||
///
|
||||
/// \returns CallInst to the kmpc_free_shared call.
|
||||
LLVM_ABI CallInst *createOMPFreeShared(const LocationDescription &Loc,
|
||||
Value *Addr, Type *VarType,
|
||||
const Twine &Name = Twine(""));
|
||||
|
||||
/// Create a runtime call for kmpc_threadprivate_cached
|
||||
///
|
||||
/// \param Loc The insert and source location description.
|
||||
@ -3170,9 +3219,11 @@ public:
|
||||
/// Generator for '#omp target data'
|
||||
///
|
||||
/// \param Loc The location where the target data construct was encountered.
|
||||
/// \param AllocaIP The insertion points to be used for alloca instructions.
|
||||
/// \param AllocIP The insertion points to be used for allocations.
|
||||
/// \param CodeGenIP The insertion point at which the target directive code
|
||||
/// should be placed.
|
||||
/// \param DeallocIPs The insertion points at which explicit deallocations
|
||||
/// should be placed, if needed.
|
||||
/// \param IsBegin If true then emits begin mapper call otherwise emits
|
||||
/// end mapper call.
|
||||
/// \param DeviceID Stores the DeviceID from the device clause.
|
||||
@ -3185,10 +3236,10 @@ public:
|
||||
/// \param DeviceAddrCB Optional callback to generate code related to
|
||||
/// use_device_ptr and use_device_addr.
|
||||
LLVM_ABI InsertPointOrErrorTy createTargetData(
|
||||
const LocationDescription &Loc, InsertPointTy AllocaIP,
|
||||
InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond,
|
||||
TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
|
||||
CustomMapperCallbackTy CustomMapperCB,
|
||||
const LocationDescription &Loc, InsertPointTy AllocIP,
|
||||
InsertPointTy CodeGenIP, ArrayRef<InsertPointTy> DeallocIPs,
|
||||
Value *DeviceID, Value *IfCond, TargetDataInfo &Info,
|
||||
GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB,
|
||||
omp::RuntimeFunction *MapperFunc = nullptr,
|
||||
function_ref<InsertPointOrErrorTy(InsertPointTy CodeGenIP,
|
||||
BodyGenTy BodyGenType)>
|
||||
@ -3197,7 +3248,8 @@ public:
|
||||
Value *SrcLocInfo = nullptr);
|
||||
|
||||
using TargetBodyGenCallbackTy = function_ref<InsertPointOrErrorTy(
|
||||
InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
|
||||
InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs)>;
|
||||
|
||||
using TargetGenArgAccessorsCallbackTy = function_ref<InsertPointOrErrorTy(
|
||||
Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP,
|
||||
@ -3209,6 +3261,8 @@ public:
|
||||
/// \param IsOffloadEntry whether it is an offload entry.
|
||||
/// \param CodeGenIP The insertion point where the call to the outlined
|
||||
/// function should be emitted.
|
||||
/// \param DeallocIPs The insertion points at which explicit deallocations
|
||||
/// should be placed, if needed.
|
||||
/// \param Info Stores all information realted to the Target directive.
|
||||
/// \param EntryInfo The entry information about the function.
|
||||
/// \param DefaultAttrs Structure containing the default attributes, including
|
||||
@ -3229,8 +3283,9 @@ public:
|
||||
/// not.
|
||||
LLVM_ABI InsertPointOrErrorTy createTarget(
|
||||
const LocationDescription &Loc, bool IsOffloadEntry,
|
||||
OpenMPIRBuilder::InsertPointTy AllocaIP,
|
||||
OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetDataInfo &Info,
|
||||
OpenMPIRBuilder::InsertPointTy AllocIP,
|
||||
OpenMPIRBuilder::InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs, TargetDataInfo &Info,
|
||||
TargetRegionEntryInfo &EntryInfo,
|
||||
const TargetKernelDefaultAttrs &DefaultAttrs,
|
||||
const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond,
|
||||
|
@ -17,14 +17,15 @@
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include <limits>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
template <typename PtrType> class SmallPtrSetImpl;
|
||||
class AddrSpaceCastInst;
|
||||
class AllocaInst;
|
||||
class BasicBlock;
|
||||
class BlockFrequency;
|
||||
class BlockFrequencyInfo;
|
||||
class BranchProbabilityInfo;
|
||||
@ -94,15 +95,23 @@ public:
|
||||
BranchProbabilityInfo *BPI;
|
||||
AssumptionCache *AC;
|
||||
|
||||
// A block outside of the extraction set where any intermediate
|
||||
// allocations will be placed inside. If this is null, allocations
|
||||
// will be placed in the entry block of the function.
|
||||
/// A block outside of the extraction set where any intermediate
|
||||
/// allocations will be placed inside. If this is null, allocations
|
||||
/// will be placed in the entry block of the function.
|
||||
BasicBlock *AllocationBlock;
|
||||
|
||||
// If true, varargs functions can be extracted.
|
||||
/// A set of blocks outside of the extraction set where deallocations for
|
||||
/// intermediate allocations should be placed. Not used for automatically
|
||||
/// deallocated memory (e.g. `alloca`), which is the default.
|
||||
///
|
||||
/// If it is empty and needed, the end of the replacement basic block will
|
||||
/// be used to place deallocations.
|
||||
SmallVector<BasicBlock *> DeallocationBlocks;
|
||||
|
||||
/// If true, varargs functions can be extracted.
|
||||
bool AllowVarArgs;
|
||||
|
||||
// Bits of intermediate state computed at various phases of extraction.
|
||||
/// Bits of intermediate state computed at various phases of extraction.
|
||||
SetVector<BasicBlock *> Blocks;
|
||||
|
||||
/// Lists of blocks that are branched from the code region to be extracted,
|
||||
@ -124,13 +133,13 @@ public:
|
||||
/// returns 1, etc.
|
||||
SmallVector<BasicBlock *> ExtractedFuncRetVals;
|
||||
|
||||
// Suffix to use when creating extracted function (appended to the original
|
||||
// function name + "."). If empty, the default is to use the entry block
|
||||
// label, if non-empty, otherwise "extracted".
|
||||
/// Suffix to use when creating extracted function (appended to the original
|
||||
/// function name + "."). If empty, the default is to use the entry block
|
||||
/// label, if non-empty, otherwise "extracted".
|
||||
std::string Suffix;
|
||||
|
||||
// If true, the outlined function has aggregate argument in zero address
|
||||
// space.
|
||||
/// If true, the outlined function has aggregate argument in zero address
|
||||
/// space.
|
||||
bool ArgsInZeroAddressSpace;
|
||||
|
||||
public:
|
||||
@ -146,10 +155,12 @@ public:
|
||||
/// however code extractor won't validate whether extraction is legal.
|
||||
/// Any new allocations will be placed in the AllocationBlock, unless
|
||||
/// it is null, in which case it will be placed in the entry block of
|
||||
/// the function from which the code is being extracted.
|
||||
/// If ArgsInZeroAddressSpace param is set to true, then the aggregate
|
||||
/// param pointer of the outlined function is declared in zero address
|
||||
/// space.
|
||||
/// the function from which the code is being extracted. Explicit
|
||||
/// deallocations for the aforementioned allocations will be placed, if
|
||||
/// needed, in all blocks in DeallocationBlocks or the end of the
|
||||
/// replacement block. If ArgsInZeroAddressSpace param is set to true, then
|
||||
/// the aggregate param pointer of the outlined function is declared in zero
|
||||
/// address space.
|
||||
LLVM_ABI
|
||||
CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT = nullptr,
|
||||
bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr,
|
||||
@ -157,8 +168,11 @@ public:
|
||||
AssumptionCache *AC = nullptr, bool AllowVarArgs = false,
|
||||
bool AllowAlloca = false,
|
||||
BasicBlock *AllocationBlock = nullptr,
|
||||
ArrayRef<BasicBlock *> DeallocationBlocks = {},
|
||||
std::string Suffix = "", bool ArgsInZeroAddressSpace = false);
|
||||
|
||||
LLVM_ABI virtual ~CodeExtractor() = default;
|
||||
|
||||
/// Perform the extraction, returning the new function.
|
||||
///
|
||||
/// Returns zero when called on a CodeExtractor instance where isEligible
|
||||
@ -243,6 +257,19 @@ public:
|
||||
/// region, passing it instead as a scalar.
|
||||
LLVM_ABI void excludeArgFromAggregate(Value *Arg);
|
||||
|
||||
protected:
|
||||
/// Allocate an intermediate variable at the specified point.
|
||||
LLVM_ABI virtual Instruction *
|
||||
allocateVar(BasicBlock *BB, BasicBlock::iterator AllocIP, Type *VarType,
|
||||
const Twine &Name = Twine(""),
|
||||
AddrSpaceCastInst **CastedAlloc = nullptr);
|
||||
|
||||
/// Deallocate a previously-allocated intermediate variable at the specified
|
||||
/// point.
|
||||
LLVM_ABI virtual Instruction *deallocateVar(BasicBlock *BB,
|
||||
BasicBlock::iterator DeallocIP,
|
||||
Value *Var, Type *VarType);
|
||||
|
||||
private:
|
||||
struct LifetimeMarkerInfo {
|
||||
bool SinkLifeStart = false;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -721,6 +721,7 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
|
||||
SubRegion, &*DT, /* AggregateArgs */ false, /* BFI */ nullptr,
|
||||
/* BPI */ nullptr, AC, /* AllowVarArgs */ false,
|
||||
/* AllowAlloca */ false, /* AllocaBlock */ nullptr,
|
||||
/* DeallocationBlocks */ {},
|
||||
/* Suffix */ "cold." + std::to_string(OutlinedFunctionID));
|
||||
|
||||
if (CE.isEligible() && isSplittingBeneficial(CE, SubRegion, TTI) &&
|
||||
|
@ -2829,7 +2829,7 @@ unsigned IROutliner::doOutline(Module &M) {
|
||||
OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
|
||||
OS->CE = new (ExtractorAllocator.Allocate())
|
||||
CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
|
||||
false, nullptr, "outlined");
|
||||
false, nullptr, {}, "outlined");
|
||||
findAddInputsOutputs(M, *OS, NotSame);
|
||||
if (!OS->IgnoreRegion)
|
||||
OutlinedRegions.push_back(OS);
|
||||
@ -2940,7 +2940,7 @@ unsigned IROutliner::doOutline(Module &M) {
|
||||
OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
|
||||
OS->CE = new (ExtractorAllocator.Allocate())
|
||||
CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
|
||||
false, nullptr, "outlined");
|
||||
false, nullptr, {}, "outlined");
|
||||
bool FunctionOutlined = extractSection(*OS);
|
||||
if (FunctionOutlined) {
|
||||
unsigned StartIdx = OS->Candidate->getStartIdx();
|
||||
|
@ -1086,7 +1086,8 @@ private:
|
||||
SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;
|
||||
|
||||
BasicBlock *StartBB = nullptr, *EndBB = nullptr;
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
BasicBlock *CGStartBB = CodeGenIP.getBlock();
|
||||
BasicBlock *CGEndBB =
|
||||
SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
|
||||
@ -1126,7 +1127,8 @@ private:
|
||||
const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
|
||||
ParentBB->getTerminator()->eraseFromParent();
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
BasicBlock *CGStartBB = CodeGenIP.getBlock();
|
||||
BasicBlock *CGEndBB =
|
||||
SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
|
||||
@ -1256,8 +1258,9 @@ private:
|
||||
// avoid overriding binding settings, and without explicit cancellation.
|
||||
OpenMPIRBuilder::InsertPointTy AfterIP =
|
||||
cantFail(OMPInfoCache.OMPBuilder.createParallel(
|
||||
Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
|
||||
OMP_PROC_BIND_default, /* IsCancellable */ false));
|
||||
Loc, AllocaIP, /* DeallocIPs */ {}, BodyGenCB, PrivCB, FiniCB,
|
||||
nullptr, nullptr, OMP_PROC_BIND_default,
|
||||
/* IsCancellable */ false));
|
||||
BranchInst::Create(AfterBB, AfterIP.getBlock());
|
||||
|
||||
// Perform the actual outlining.
|
||||
@ -5020,6 +5023,29 @@ struct AAKernelInfoCallSite : AAKernelInfo {
|
||||
case OMPRTL___kmpc_free_shared:
|
||||
// Return without setting a fixpoint, to be resolved in updateImpl.
|
||||
return;
|
||||
case OMPRTL___kmpc_distribute_static_loop_4:
|
||||
case OMPRTL___kmpc_distribute_static_loop_4u:
|
||||
case OMPRTL___kmpc_distribute_static_loop_8:
|
||||
case OMPRTL___kmpc_distribute_static_loop_8u:
|
||||
case OMPRTL___kmpc_distribute_for_static_loop_4:
|
||||
case OMPRTL___kmpc_distribute_for_static_loop_4u:
|
||||
case OMPRTL___kmpc_distribute_for_static_loop_8:
|
||||
case OMPRTL___kmpc_distribute_for_static_loop_8u:
|
||||
case OMPRTL___kmpc_for_static_loop_4:
|
||||
case OMPRTL___kmpc_for_static_loop_4u:
|
||||
case OMPRTL___kmpc_for_static_loop_8:
|
||||
case OMPRTL___kmpc_for_static_loop_8u:
|
||||
// Parallel regions might be reached by these calls, as they take a
|
||||
// callback argument potentially containing arbitrary user-provided
|
||||
// code.
|
||||
ReachedUnknownParallelRegions.insert(&CB);
|
||||
// TODO: The presence of these calls on their own does not prevent a
|
||||
// kernel from being SPMD-izable. We mark it as such because we need
|
||||
// further changes in order to also consider the contents of the
|
||||
// callbacks passed to them.
|
||||
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
|
||||
SPMDCompatibilityTracker.insert(&CB);
|
||||
break;
|
||||
default:
|
||||
// Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
|
||||
// generally. However, they do not hide parallel regions.
|
||||
|
@ -25,7 +25,6 @@
|
||||
#include "llvm/Analysis/BranchProbabilityInfo.h"
|
||||
#include "llvm/IR/Argument.h"
|
||||
#include "llvm/IR/Attributes.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/CFG.h"
|
||||
#include "llvm/IR/Constant.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
@ -264,11 +263,12 @@ CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
|
||||
bool AggregateArgs, BlockFrequencyInfo *BFI,
|
||||
BranchProbabilityInfo *BPI, AssumptionCache *AC,
|
||||
bool AllowVarArgs, bool AllowAlloca,
|
||||
BasicBlock *AllocationBlock, std::string Suffix,
|
||||
bool ArgsInZeroAddressSpace)
|
||||
BasicBlock *AllocationBlock,
|
||||
ArrayRef<BasicBlock *> DeallocationBlocks,
|
||||
std::string Suffix, bool ArgsInZeroAddressSpace)
|
||||
: DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
|
||||
BPI(BPI), AC(AC), AllocationBlock(AllocationBlock),
|
||||
AllowVarArgs(AllowVarArgs),
|
||||
DeallocationBlocks(DeallocationBlocks), AllowVarArgs(AllowVarArgs),
|
||||
Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)),
|
||||
Suffix(Suffix), ArgsInZeroAddressSpace(ArgsInZeroAddressSpace) {}
|
||||
|
||||
@ -444,6 +444,27 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) {
|
||||
return CommonExitBlock;
|
||||
}
|
||||
|
||||
Instruction *CodeExtractor::allocateVar(BasicBlock *BB,
|
||||
BasicBlock::iterator AllocIP,
|
||||
Type *VarType, const Twine &Name,
|
||||
AddrSpaceCastInst **CastedAlloc) {
|
||||
const DataLayout &DL = BB->getModule()->getDataLayout();
|
||||
Instruction *Alloca =
|
||||
new AllocaInst(VarType, DL.getAllocaAddrSpace(), nullptr, Name, AllocIP);
|
||||
|
||||
if (CastedAlloc && ArgsInZeroAddressSpace && DL.getAllocaAddrSpace() != 0) {
|
||||
*CastedAlloc = new AddrSpaceCastInst(
|
||||
Alloca, PointerType::get(BB->getContext(), 0), Name + ".ascast");
|
||||
(*CastedAlloc)->insertAfter(Alloca->getIterator());
|
||||
}
|
||||
return Alloca;
|
||||
}
|
||||
|
||||
Instruction *CodeExtractor::deallocateVar(BasicBlock *, BasicBlock::iterator,
|
||||
Value *, Type *) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Find the pair of life time markers for address 'Addr' that are either
|
||||
// defined inside the outline region or can legally be shrinkwrapped into the
|
||||
// outline region. If there are not other untracked uses of the address, return
|
||||
@ -1819,7 +1840,6 @@ CallInst *CodeExtractor::emitReplacerCall(
|
||||
std::vector<Value *> &Reloads) {
|
||||
LLVMContext &Context = oldFunction->getContext();
|
||||
Module *M = oldFunction->getParent();
|
||||
const DataLayout &DL = M->getDataLayout();
|
||||
|
||||
// This takes place of the original loop
|
||||
BasicBlock *codeReplacer =
|
||||
@ -1850,25 +1870,22 @@ CallInst *CodeExtractor::emitReplacerCall(
|
||||
if (StructValues.contains(output))
|
||||
continue;
|
||||
|
||||
AllocaInst *alloca = new AllocaInst(
|
||||
output->getType(), DL.getAllocaAddrSpace(), nullptr,
|
||||
output->getName() + ".loc", AllocaBlock->getFirstInsertionPt());
|
||||
params.push_back(alloca);
|
||||
ReloadOutputs.push_back(alloca);
|
||||
Value *OutAlloc =
|
||||
allocateVar(AllocaBlock, AllocaBlock->getFirstInsertionPt(),
|
||||
output->getType(), output->getName() + ".loc");
|
||||
params.push_back(OutAlloc);
|
||||
ReloadOutputs.push_back(OutAlloc);
|
||||
}
|
||||
|
||||
AllocaInst *Struct = nullptr;
|
||||
Instruction *Struct = nullptr;
|
||||
if (!StructValues.empty()) {
|
||||
Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr,
|
||||
"structArg", AllocaBlock->getFirstInsertionPt());
|
||||
if (ArgsInZeroAddressSpace && DL.getAllocaAddrSpace() != 0) {
|
||||
auto *StructSpaceCast = new AddrSpaceCastInst(
|
||||
Struct, PointerType ::get(Context, 0), "structArg.ascast");
|
||||
StructSpaceCast->insertAfter(Struct->getIterator());
|
||||
AddrSpaceCastInst *StructSpaceCast = nullptr;
|
||||
Struct = allocateVar(AllocaBlock, AllocaBlock->getFirstInsertionPt(),
|
||||
StructArgTy, "structArg", &StructSpaceCast);
|
||||
if (StructSpaceCast)
|
||||
params.push_back(StructSpaceCast);
|
||||
} else {
|
||||
else
|
||||
params.push_back(Struct);
|
||||
}
|
||||
|
||||
unsigned AggIdx = 0;
|
||||
for (Value *input : inputs) {
|
||||
@ -2011,6 +2028,27 @@ CallInst *CodeExtractor::emitReplacerCall(
|
||||
insertLifetimeMarkersSurroundingCall(oldFunction->getParent(), LifetimesStart,
|
||||
{}, call);
|
||||
|
||||
// Deallocate intermediate variables if they need explicit deallocation.
|
||||
auto deallocVars = [&](BasicBlock *DeallocBlock,
|
||||
BasicBlock::iterator DeallocIP) {
|
||||
int Index = 0;
|
||||
for (Value *Output : outputs) {
|
||||
if (!StructValues.contains(Output))
|
||||
deallocateVar(DeallocBlock, DeallocIP, ReloadOutputs[Index++],
|
||||
Output->getType());
|
||||
}
|
||||
|
||||
if (Struct)
|
||||
deallocateVar(DeallocBlock, DeallocIP, Struct, StructArgTy);
|
||||
};
|
||||
|
||||
if (DeallocationBlocks.empty()) {
|
||||
deallocVars(codeReplacer, codeReplacer->end());
|
||||
} else {
|
||||
for (BasicBlock *DeallocationBlock : DeallocationBlocks)
|
||||
deallocVars(DeallocationBlock, DeallocationBlock->getFirstInsertionPt());
|
||||
}
|
||||
|
||||
return call;
|
||||
}
|
||||
|
||||
|
@ -55,8 +55,9 @@ using namespace omp;
|
||||
}
|
||||
|
||||
#define BODYGENCB_WRAPPER(cb) \
|
||||
[&cb](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) -> Error { \
|
||||
cb(AllocaIP, CodeGenIP); \
|
||||
[&cb](InsertPointTy AllocIP, InsertPointTy CodeGenIP, \
|
||||
ArrayRef<InsertPointTy> DeallocIPs) -> Error { \
|
||||
cb(AllocIP, CodeGenIP, DeallocIPs); \
|
||||
return Error::success(); \
|
||||
}
|
||||
|
||||
@ -664,10 +665,11 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) {
|
||||
unsigned NumPrivatizedVars = 0;
|
||||
unsigned NumFinalizationPoints = 0;
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
++NumBodiesGenerated;
|
||||
|
||||
Builder.restoreIP(AllocaIP);
|
||||
Builder.restoreIP(AllocIP);
|
||||
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
|
||||
Builder.CreateStore(F->arg_begin(), PrivAI);
|
||||
|
||||
@ -715,8 +717,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) {
|
||||
F->getEntryBlock().getFirstInsertionPt());
|
||||
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createParallel(
|
||||
Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr,
|
||||
nullptr, OMP_PROC_BIND_default, false));
|
||||
Loc, AllocaIP, {}, BodyGenCB, PrivCB, FiniCB,
|
||||
nullptr, nullptr, OMP_PROC_BIND_default, false));
|
||||
|
||||
EXPECT_EQ(NumBodiesGenerated, 1U);
|
||||
EXPECT_EQ(NumPrivatizedVars, 1U);
|
||||
@ -777,10 +779,11 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
|
||||
unsigned NumPrivatizedVars = 0;
|
||||
unsigned NumFinalizationPoints = 0;
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
++NumBodiesGenerated;
|
||||
|
||||
Builder.restoreIP(AllocaIP);
|
||||
Builder.restoreIP(AllocIP);
|
||||
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
|
||||
Builder.CreateStore(F->arg_begin(), PrivAI);
|
||||
|
||||
@ -828,8 +831,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
|
||||
F->getEntryBlock().getFirstInsertionPt());
|
||||
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createParallel(
|
||||
Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr,
|
||||
nullptr, OMP_PROC_BIND_default, false));
|
||||
Loc, AllocaIP, {}, BodyGenCB, PrivCB, FiniCB,
|
||||
nullptr, nullptr, OMP_PROC_BIND_default, false));
|
||||
EXPECT_EQ(NumBodiesGenerated, 1U);
|
||||
EXPECT_EQ(NumPrivatizedVars, 1U);
|
||||
EXPECT_EQ(NumFinalizationPoints, 1U);
|
||||
@ -885,7 +888,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) {
|
||||
unsigned NumOuterBodiesGenerated = 0;
|
||||
unsigned NumFinalizationPoints = 0;
|
||||
|
||||
auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto InnerBodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
++NumInnerBodiesGenerated;
|
||||
return Error::success();
|
||||
};
|
||||
@ -908,7 +912,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) {
|
||||
return Error::success();
|
||||
};
|
||||
|
||||
auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto OuterBodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
++NumOuterBodiesGenerated;
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
BasicBlock *CGBB = CodeGenIP.getBlock();
|
||||
@ -917,7 +922,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) {
|
||||
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocaIP,
|
||||
OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocIP, {},
|
||||
InnerBodyGenCB, PrivCB, FiniCB, nullptr,
|
||||
nullptr, OMP_PROC_BIND_default, false));
|
||||
|
||||
@ -929,7 +934,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) {
|
||||
F->getEntryBlock().getFirstInsertionPt());
|
||||
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createParallel(
|
||||
Loc, AllocaIP, BODYGENCB_WRAPPER(OuterBodyGenCB),
|
||||
Loc, AllocaIP, {}, BODYGENCB_WRAPPER(OuterBodyGenCB),
|
||||
PrivCB, FiniCB, nullptr, nullptr,
|
||||
OMP_PROC_BIND_default, false));
|
||||
|
||||
@ -986,7 +991,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
|
||||
unsigned NumOuterBodiesGenerated = 0;
|
||||
unsigned NumFinalizationPoints = 0;
|
||||
|
||||
auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto InnerBodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
++NumInnerBodiesGenerated;
|
||||
return Error::success();
|
||||
};
|
||||
@ -1009,7 +1015,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
|
||||
return Error::success();
|
||||
};
|
||||
|
||||
auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto OuterBodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
++NumOuterBodiesGenerated;
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
BasicBlock *CGBB = CodeGenIP.getBlock();
|
||||
@ -1022,18 +1029,18 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
|
||||
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP1,
|
||||
OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocaIP,
|
||||
OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocIP, {},
|
||||
InnerBodyGenCB, PrivCB, FiniCB, nullptr,
|
||||
nullptr, OMP_PROC_BIND_default, false));
|
||||
|
||||
Builder.restoreIP(AfterIP1);
|
||||
Builder.CreateBr(NewBB1);
|
||||
|
||||
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP2,
|
||||
OMPBuilder.createParallel(
|
||||
InsertPointTy(NewBB1, NewBB1->end()), AllocaIP,
|
||||
InnerBodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
|
||||
OMP_PROC_BIND_default, false));
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP2,
|
||||
OMPBuilder.createParallel(InsertPointTy(NewBB1, NewBB1->end()), AllocIP,
|
||||
{}, InnerBodyGenCB, PrivCB, FiniCB, nullptr,
|
||||
nullptr, OMP_PROC_BIND_default, false));
|
||||
|
||||
Builder.restoreIP(AfterIP2);
|
||||
Builder.CreateBr(NewBB2);
|
||||
@ -1043,7 +1050,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
|
||||
F->getEntryBlock().getFirstInsertionPt());
|
||||
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createParallel(
|
||||
Loc, AllocaIP, BODYGENCB_WRAPPER(OuterBodyGenCB),
|
||||
Loc, AllocaIP, {}, BODYGENCB_WRAPPER(OuterBodyGenCB),
|
||||
PrivCB, FiniCB, nullptr, nullptr,
|
||||
OMP_PROC_BIND_default, false));
|
||||
|
||||
@ -1107,10 +1114,11 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
|
||||
unsigned NumPrivatizedVars = 0;
|
||||
unsigned NumFinalizationPoints = 0;
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
++NumBodiesGenerated;
|
||||
|
||||
Builder.restoreIP(AllocaIP);
|
||||
Builder.restoreIP(AllocIP);
|
||||
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
|
||||
Builder.CreateStore(F->arg_begin(), PrivAI);
|
||||
|
||||
@ -1159,7 +1167,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
|
||||
F->getEntryBlock().getFirstInsertionPt());
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
|
||||
OMPBuilder.createParallel(Loc, AllocaIP, {}, BodyGenCB, PrivCB, FiniCB,
|
||||
Builder.CreateIsNotNull(F->arg_begin()),
|
||||
nullptr, OMP_PROC_BIND_default, false));
|
||||
|
||||
@ -1214,7 +1222,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
|
||||
unsigned NumFinalizationPoints = 0;
|
||||
|
||||
CallInst *CheckedBarrier = nullptr;
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
++NumBodiesGenerated;
|
||||
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
@ -1282,11 +1291,12 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
|
||||
|
||||
IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
|
||||
F->getEntryBlock().getFirstInsertionPt());
|
||||
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createParallel(
|
||||
Loc, AllocaIP, BODYGENCB_WRAPPER(BodyGenCB), PrivCB,
|
||||
FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
|
||||
nullptr, OMP_PROC_BIND_default, true));
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createParallel(Loc, AllocaIP, {}, BODYGENCB_WRAPPER(BodyGenCB),
|
||||
PrivCB, FiniCB,
|
||||
Builder.CreateIsNotNull(F->arg_begin()),
|
||||
nullptr, OMP_PROC_BIND_default, true));
|
||||
|
||||
EXPECT_EQ(NumBodiesGenerated, 1U);
|
||||
EXPECT_EQ(NumPrivatizedVars, 0U);
|
||||
@ -1351,7 +1361,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
|
||||
Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc);
|
||||
|
||||
Instruction *Internal;
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
IRBuilder<>::InsertPointGuard Guard(Builder);
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
Internal = Builder.CreateCall(TakeI32Func, I32Val);
|
||||
@ -1371,8 +1382,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
|
||||
F->getEntryBlock().getFirstInsertionPt());
|
||||
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createParallel(
|
||||
Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr,
|
||||
nullptr, OMP_PROC_BIND_default, false));
|
||||
Loc, AllocaIP, {}, BodyGenCB, PrivCB, FiniCB,
|
||||
nullptr, nullptr, OMP_PROC_BIND_default, false));
|
||||
Builder.restoreIP(AfterIP);
|
||||
Builder.CreateRetVoid();
|
||||
|
||||
@ -2875,9 +2886,10 @@ TEST_F(OpenMPIRBuilderTest, MasterDirective) {
|
||||
BasicBlock *EntryBB = nullptr;
|
||||
BasicBlock *ThenBB = nullptr;
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
if (AllocaIP.isSet())
|
||||
Builder.restoreIP(AllocaIP);
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
if (AllocIP.isSet())
|
||||
Builder.restoreIP(AllocIP);
|
||||
else
|
||||
Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
|
||||
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
|
||||
@ -2956,9 +2968,10 @@ TEST_F(OpenMPIRBuilderTest, MaskedDirective) {
|
||||
BasicBlock *EntryBB = nullptr;
|
||||
BasicBlock *ThenBB = nullptr;
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
if (AllocaIP.isSet())
|
||||
Builder.restoreIP(AllocaIP);
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
if (AllocIP.isSet())
|
||||
Builder.restoreIP(AllocIP);
|
||||
else
|
||||
Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
|
||||
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
|
||||
@ -3035,7 +3048,8 @@ TEST_F(OpenMPIRBuilderTest, CriticalDirective) {
|
||||
|
||||
AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
// actual start for bodyCB
|
||||
llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
|
||||
llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
|
||||
@ -3286,7 +3300,8 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) {
|
||||
AllocaInst *PrivAI =
|
||||
Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
|
||||
llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
|
||||
EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
|
||||
@ -3360,7 +3375,8 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) {
|
||||
AllocaInst *PrivAI =
|
||||
Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
|
||||
llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
|
||||
EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
|
||||
@ -3467,9 +3483,10 @@ TEST_F(OpenMPIRBuilderTest, SingleDirective) {
|
||||
BasicBlock *EntryBB = nullptr;
|
||||
BasicBlock *ThenBB = nullptr;
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
if (AllocaIP.isSet())
|
||||
Builder.restoreIP(AllocaIP);
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
if (AllocIP.isSet())
|
||||
Builder.restoreIP(AllocIP);
|
||||
else
|
||||
Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
|
||||
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
|
||||
@ -3560,9 +3577,10 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) {
|
||||
BasicBlock *EntryBB = nullptr;
|
||||
BasicBlock *ThenBB = nullptr;
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
if (AllocaIP.isSet())
|
||||
Builder.restoreIP(AllocaIP);
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
if (AllocIP.isSet())
|
||||
Builder.restoreIP(AllocIP);
|
||||
else
|
||||
Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
|
||||
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
|
||||
@ -3681,9 +3699,10 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) {
|
||||
Function *CopyFunc =
|
||||
Function::Create(CopyFuncTy, Function::PrivateLinkage, "copy_var", *M);
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
if (AllocaIP.isSet())
|
||||
Builder.restoreIP(AllocaIP);
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
if (AllocIP.isSet())
|
||||
Builder.restoreIP(AllocIP);
|
||||
else
|
||||
Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
|
||||
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
|
||||
@ -4545,8 +4564,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTeams) {
|
||||
AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty());
|
||||
Value *Val128 = Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "load");
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
Builder.restoreIP(AllocaIP);
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
Builder.restoreIP(AllocIP);
|
||||
AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
|
||||
"bodygen.alloca128");
|
||||
|
||||
@ -4626,7 +4646,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithThreadLimit) {
|
||||
Function::Create(FunctionType::get(Builder.getVoidTy(), false),
|
||||
GlobalValue::ExternalLinkage, "fakeFunction", M.get());
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
Builder.CreateCall(FakeFunction, {});
|
||||
return Error::success();
|
||||
@ -4682,7 +4703,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsUpper) {
|
||||
Function::Create(FunctionType::get(Builder.getVoidTy(), false),
|
||||
GlobalValue::ExternalLinkage, "fakeFunction", M.get());
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
Builder.CreateCall(FakeFunction, {});
|
||||
return Error::success();
|
||||
@ -4744,7 +4766,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsBoth) {
|
||||
Value *NumTeamsUpper =
|
||||
Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper");
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
Builder.CreateCall(FakeFunction, {});
|
||||
return Error::success();
|
||||
@ -4811,7 +4834,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsAndThreadLimit) {
|
||||
Function::Create(FunctionType::get(Builder.getVoidTy(), false),
|
||||
GlobalValue::ExternalLinkage, "fakeFunction", M.get());
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
Builder.CreateCall(FakeFunction, {});
|
||||
return Error::success();
|
||||
@ -4868,7 +4892,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfCondition) {
|
||||
Function::Create(FunctionType::get(Builder.getVoidTy(), false),
|
||||
GlobalValue::ExternalLinkage, "fakeFunction", M.get());
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
Builder.CreateCall(FakeFunction, {});
|
||||
return Error::success();
|
||||
@ -4935,7 +4960,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfConditionAndNumTeams) {
|
||||
Function::Create(FunctionType::get(Builder.getVoidTy(), false),
|
||||
GlobalValue::ExternalLinkage, "fakeFunction", M.get());
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
Builder.CreateCall(FakeFunction, {});
|
||||
return Error::success();
|
||||
@ -5153,7 +5179,8 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) {
|
||||
// xor of thread-id;
|
||||
// and store the result in global variables.
|
||||
InsertPointTy BodyIP, BodyAllocaIP;
|
||||
auto BodyGenCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy InnerAllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
IRBuilderBase::InsertPointGuard Guard(Builder);
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
|
||||
@ -5171,7 +5198,7 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) {
|
||||
Builder.CreateStore(Xor, XorReduced);
|
||||
|
||||
BodyIP = Builder.saveIP();
|
||||
BodyAllocaIP = InnerAllocaIP;
|
||||
BodyAllocaIP = InnerAllocIP;
|
||||
return Error::success();
|
||||
};
|
||||
|
||||
@ -5207,12 +5234,12 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) {
|
||||
// Do nothing in finalization.
|
||||
auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); };
|
||||
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB,
|
||||
/* IfCondition */ nullptr,
|
||||
/* NumThreads */ nullptr, OMP_PROC_BIND_default,
|
||||
/* IsCancellable */ false));
|
||||
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createParallel(
|
||||
Loc, OuterAllocaIP, {}, BodyGenCB, PrivCB, FiniCB,
|
||||
/* IfCondition */ nullptr,
|
||||
/* NumThreads */ nullptr, OMP_PROC_BIND_default,
|
||||
/* IsCancellable */ false));
|
||||
Builder.restoreIP(AfterIP);
|
||||
|
||||
OpenMPIRBuilder::ReductionInfo ReductionInfos[] = {
|
||||
@ -5531,8 +5558,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
|
||||
Builder.CreateStore(Builder.getInt32(1), XorReduced);
|
||||
|
||||
InsertPointTy FirstBodyIP, FirstBodyAllocaIP;
|
||||
auto FirstBodyGenCB = [&](InsertPointTy InnerAllocaIP,
|
||||
InsertPointTy CodeGenIP) {
|
||||
auto FirstBodyGenCB = [&](InsertPointTy InnerAllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
IRBuilderBase::InsertPointGuard Guard(Builder);
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
|
||||
@ -5547,13 +5574,14 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
|
||||
Builder.CreateStore(Sum, SumReduced);
|
||||
|
||||
FirstBodyIP = Builder.saveIP();
|
||||
FirstBodyAllocaIP = InnerAllocaIP;
|
||||
FirstBodyAllocaIP = InnerAllocIP;
|
||||
return Error::success();
|
||||
};
|
||||
|
||||
InsertPointTy SecondBodyIP, SecondBodyAllocaIP;
|
||||
auto SecondBodyGenCB = [&](InsertPointTy InnerAllocaIP,
|
||||
InsertPointTy CodeGenIP) {
|
||||
auto SecondBodyGenCB = [&](InsertPointTy InnerAllocIP,
|
||||
InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
IRBuilderBase::InsertPointGuard Guard(Builder);
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
|
||||
@ -5566,7 +5594,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
|
||||
Builder.CreateStore(Xor, XorReduced);
|
||||
|
||||
SecondBodyIP = Builder.saveIP();
|
||||
SecondBodyAllocaIP = InnerAllocaIP;
|
||||
SecondBodyAllocaIP = InnerAllocIP;
|
||||
return Error::success();
|
||||
};
|
||||
|
||||
@ -5606,14 +5634,14 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
|
||||
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP1,
|
||||
OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB,
|
||||
OMPBuilder.createParallel(Loc, OuterAllocaIP, {}, FirstBodyGenCB, PrivCB,
|
||||
FiniCB, /* IfCondition */ nullptr,
|
||||
/* NumThreads */ nullptr, OMP_PROC_BIND_default,
|
||||
/* IsCancellable */ false));
|
||||
Builder.restoreIP(AfterIP1);
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP2,
|
||||
OMPBuilder.createParallel({Builder.saveIP(), DL}, OuterAllocaIP,
|
||||
OMPBuilder.createParallel({Builder.saveIP(), DL}, OuterAllocaIP, {},
|
||||
SecondBodyGenCB, PrivCB, FiniCB,
|
||||
/* IfCondition */ nullptr,
|
||||
/* NumThreads */ nullptr, OMP_PROC_BIND_default,
|
||||
@ -5707,7 +5735,8 @@ TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) {
|
||||
llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
|
||||
|
||||
auto FiniCB = [&](InsertPointTy IP) { return Error::success(); };
|
||||
auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto SectionCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
return Error::success();
|
||||
};
|
||||
SectionCBVector.push_back(SectionCB);
|
||||
@ -5752,7 +5781,8 @@ TEST_F(OpenMPIRBuilderTest, CreateSections) {
|
||||
EXPECT_NE(IPBB->end(), IP.getPoint());
|
||||
};
|
||||
|
||||
auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto SectionCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
++NumBodiesGenerated;
|
||||
CaseBBs.push_back(CodeGenIP.getBlock());
|
||||
SwitchBB = CodeGenIP.getBlock()->getSinglePredecessor();
|
||||
@ -6092,7 +6122,7 @@ TEST_F(OpenMPIRBuilderTest, TargetEnterData) {
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createTargetData(
|
||||
Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
|
||||
Loc, AllocaIP, Builder.saveIP(), {}, Builder.getInt64(DeviceID),
|
||||
/* IfCond= */ nullptr, Info, GenMapInfoCB, CustomMapperCB, &RTLFunc));
|
||||
Builder.restoreIP(AfterIP);
|
||||
|
||||
@ -6155,7 +6185,7 @@ TEST_F(OpenMPIRBuilderTest, TargetExitData) {
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createTargetData(
|
||||
Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
|
||||
Loc, AllocaIP, Builder.saveIP(), {}, Builder.getInt64(DeviceID),
|
||||
/* IfCond= */ nullptr, Info, GenMapInfoCB, CustomMapperCB, &RTLFunc));
|
||||
Builder.restoreIP(AfterIP);
|
||||
|
||||
@ -6266,7 +6296,7 @@ TEST_F(OpenMPIRBuilderTest, TargetDataRegion) {
|
||||
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, TargetDataIP1,
|
||||
OMPBuilder.createTargetData(Loc, AllocaIP, Builder.saveIP(),
|
||||
OMPBuilder.createTargetData(Loc, AllocaIP, Builder.saveIP(), {},
|
||||
Builder.getInt64(DeviceID),
|
||||
/* IfCond= */ nullptr, Info, GenMapInfoCB,
|
||||
CustomMapperCB, nullptr, BodyCB));
|
||||
@ -6295,7 +6325,7 @@ TEST_F(OpenMPIRBuilderTest, TargetDataRegion) {
|
||||
};
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, TargetDataIP2,
|
||||
OMPBuilder.createTargetData(Loc, AllocaIP, Builder.saveIP(),
|
||||
OMPBuilder.createTargetData(Loc, AllocaIP, Builder.saveIP(), {},
|
||||
Builder.getInt64(DeviceID),
|
||||
/* IfCond= */ nullptr, Info, GenMapInfoCB,
|
||||
CustomMapperCB, nullptr, BodyTargetCB));
|
||||
@ -6346,8 +6376,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) {
|
||||
|
||||
Builder.CreateStore(Builder.getInt32(10), APtr);
|
||||
Builder.CreateStore(Builder.getInt32(20), BPtr);
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP,
|
||||
InsertPointTy CodeGenIP) -> InsertPointTy {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) -> InsertPointTy {
|
||||
IRBuilderBase::InsertPointGuard guard(Builder);
|
||||
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
@ -6417,10 +6447,10 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) {
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createTarget(OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(),
|
||||
Builder.saveIP(), Info, EntryInfo, DefaultAttrs,
|
||||
RuntimeAttrs, /*IfCond=*/nullptr, Inputs,
|
||||
GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB,
|
||||
CustomMapperCB, {}, false));
|
||||
Builder.saveIP(), {}, Info, EntryInfo,
|
||||
DefaultAttrs, RuntimeAttrs, /*IfCond=*/nullptr,
|
||||
Inputs, GenMapInfoCB, BodyGenCB,
|
||||
SimpleArgAccessorCB, CustomMapperCB, {}, false));
|
||||
EXPECT_EQ(DL, Builder.getCurrentDebugLocation());
|
||||
Builder.restoreIP(AfterIP);
|
||||
|
||||
@ -6565,8 +6595,9 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
|
||||
};
|
||||
|
||||
auto CustomMapperCB = [&](unsigned int I) { return nullptr; };
|
||||
auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
|
||||
OpenMPIRBuilder::InsertPointTy CodeGenIP)
|
||||
auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocIP,
|
||||
OpenMPIRBuilder::InsertPointTy CodeGenIP,
|
||||
ArrayRef<OpenMPIRBuilder::InsertPointTy> DeallocIPs)
|
||||
-> OpenMPIRBuilder::InsertPointTy {
|
||||
IRBuilderBase::InsertPointGuard guard(Builder);
|
||||
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
|
||||
@ -6591,7 +6622,7 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
|
||||
Info, EntryInfo, DefaultAttrs, RuntimeAttrs,
|
||||
{}, Info, EntryInfo, DefaultAttrs, RuntimeAttrs,
|
||||
/*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB,
|
||||
BodyGenCB, SimpleArgAccessorCB, CustomMapperCB,
|
||||
{}, false));
|
||||
@ -6672,7 +6703,14 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
|
||||
Instruction *Value1 = &*OutlinedBlock->getFirstNonPHIIt();
|
||||
EXPECT_EQ(Value1, Value);
|
||||
EXPECT_EQ(Value1->getNextNode(), TargetStore);
|
||||
auto *Deinit = TargetStore->getNextNode();
|
||||
|
||||
auto *TargetExitBlockBr = TargetStore->getNextNode();
|
||||
EXPECT_TRUE(isa<BranchInst>(TargetExitBlockBr));
|
||||
|
||||
auto *TargetExitBlock = TargetExitBlockBr->getSuccessor(0);
|
||||
EXPECT_EQ(TargetExitBlock->getName(), "target.exit");
|
||||
|
||||
Instruction *Deinit = &*TargetExitBlock->getFirstNonPHIIt();
|
||||
EXPECT_NE(Deinit, nullptr);
|
||||
|
||||
auto *DeinitCall = dyn_cast<CallInst>(Deinit);
|
||||
@ -6719,8 +6757,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionSPMD) {
|
||||
IRBuilder<> Builder(BB);
|
||||
|
||||
auto CustomMapperCB = [&](unsigned int I) { return nullptr; };
|
||||
auto BodyGenCB = [&](InsertPointTy,
|
||||
InsertPointTy CodeGenIP) -> InsertPointTy {
|
||||
auto BodyGenCB = [&](InsertPointTy, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy>) -> InsertPointTy {
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
return Builder.saveIP();
|
||||
};
|
||||
@ -6753,10 +6791,10 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionSPMD) {
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createTarget(OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(),
|
||||
Builder.saveIP(), Info, EntryInfo, DefaultAttrs,
|
||||
RuntimeAttrs, /*IfCond=*/nullptr, Inputs,
|
||||
GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB,
|
||||
CustomMapperCB, {}));
|
||||
Builder.saveIP(), {}, Info, EntryInfo,
|
||||
DefaultAttrs, RuntimeAttrs, /*IfCond=*/nullptr,
|
||||
Inputs, GenMapInfoCB, BodyGenCB,
|
||||
SimpleArgAccessorCB, CustomMapperCB, {}));
|
||||
Builder.restoreIP(AfterIP);
|
||||
|
||||
OMPBuilder.finalize();
|
||||
@ -6839,7 +6877,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDeviceSPMD) {
|
||||
|
||||
auto CustomMapperCB = [&](unsigned int I) { return nullptr; };
|
||||
auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy,
|
||||
OpenMPIRBuilder::InsertPointTy CodeGenIP)
|
||||
OpenMPIRBuilder::InsertPointTy CodeGenIP,
|
||||
ArrayRef<OpenMPIRBuilder::InsertPointTy>)
|
||||
-> OpenMPIRBuilder::InsertPointTy {
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
OutlinedFn = CodeGenIP.getBlock()->getParent();
|
||||
@ -6860,8 +6899,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDeviceSPMD) {
|
||||
|
||||
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createTarget(
|
||||
Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, Info,
|
||||
EntryInfo, DefaultAttrs, RuntimeAttrs,
|
||||
Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, {},
|
||||
Info, EntryInfo, DefaultAttrs, RuntimeAttrs,
|
||||
/*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB,
|
||||
BodyGenCB, SimpleArgAccessorCB, CustomMapperCB, {}));
|
||||
Builder.restoreIP(AfterIP);
|
||||
@ -6958,8 +6997,9 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
|
||||
llvm::Value *RaiseAlloca = nullptr;
|
||||
|
||||
auto CustomMapperCB = [&](unsigned int I) { return nullptr; };
|
||||
auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
|
||||
OpenMPIRBuilder::InsertPointTy CodeGenIP)
|
||||
auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocIP,
|
||||
OpenMPIRBuilder::InsertPointTy CodeGenIP,
|
||||
ArrayRef<OpenMPIRBuilder::InsertPointTy> DeallocIPs)
|
||||
-> OpenMPIRBuilder::InsertPointTy {
|
||||
IRBuilderBase::InsertPointGuard guard(Builder);
|
||||
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
|
||||
@ -6985,7 +7025,7 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
|
||||
Info, EntryInfo, DefaultAttrs, RuntimeAttrs,
|
||||
{}, Info, EntryInfo, DefaultAttrs, RuntimeAttrs,
|
||||
/*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB,
|
||||
BodyGenCB, SimpleArgAccessorCB, CustomMapperCB,
|
||||
{}, false));
|
||||
@ -7062,7 +7102,14 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
|
||||
EXPECT_TRUE(isa<LoadInst>(Load2));
|
||||
EXPECT_EQ(Load2, Value);
|
||||
EXPECT_EQ(Load2->getNextNode(), TargetStore);
|
||||
auto *Deinit = TargetStore->getNextNode();
|
||||
|
||||
auto *TargetExitBlockBr = TargetStore->getNextNode();
|
||||
EXPECT_TRUE(isa<BranchInst>(TargetExitBlockBr));
|
||||
|
||||
auto *TargetExitBlock = TargetExitBlockBr->getSuccessor(0);
|
||||
EXPECT_EQ(TargetExitBlock->getName(), "target.exit");
|
||||
|
||||
Instruction *Deinit = &*TargetExitBlock->getFirstNonPHIIt();
|
||||
EXPECT_NE(Deinit, nullptr);
|
||||
|
||||
auto *DeinitCall = dyn_cast<CallInst>(Deinit);
|
||||
@ -7091,8 +7138,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTask) {
|
||||
Value *Val128 =
|
||||
Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load");
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
Builder.restoreIP(AllocaIP);
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
Builder.restoreIP(AllocIP);
|
||||
AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
|
||||
"bodygen.alloca128");
|
||||
|
||||
@ -7120,7 +7168,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTask) {
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createTask(
|
||||
Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
|
||||
BodyGenCB));
|
||||
/*DeallocIPs=*/{}, BodyGenCB));
|
||||
Builder.restoreIP(AfterIP);
|
||||
OMPBuilder.finalize();
|
||||
Builder.CreateRetVoid();
|
||||
@ -7219,7 +7267,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) {
|
||||
F->setName("func");
|
||||
IRBuilder<> Builder(BB);
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
return Error::success();
|
||||
};
|
||||
|
||||
@ -7231,7 +7280,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) {
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createTask(
|
||||
Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
|
||||
BodyGenCB));
|
||||
/*DeallocIPs=*/{}, BodyGenCB));
|
||||
Builder.restoreIP(AfterIP);
|
||||
OMPBuilder.finalize();
|
||||
Builder.CreateRetVoid();
|
||||
@ -7254,7 +7303,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) {
|
||||
OMPBuilder.initialize();
|
||||
F->setName("func");
|
||||
IRBuilder<> Builder(BB);
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
return Error::success();
|
||||
};
|
||||
BasicBlock *AllocaBB = Builder.GetInsertBlock();
|
||||
@ -7265,7 +7315,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) {
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createTask(
|
||||
Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
|
||||
BodyGenCB,
|
||||
/*DeallocIPs=*/{}, BodyGenCB,
|
||||
/*Tied=*/false));
|
||||
Builder.restoreIP(AfterIP);
|
||||
OMPBuilder.finalize();
|
||||
@ -7290,7 +7340,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) {
|
||||
OMPBuilder.initialize();
|
||||
F->setName("func");
|
||||
IRBuilder<> Builder(BB);
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
return Error::success();
|
||||
};
|
||||
BasicBlock *AllocaBB = Builder.GetInsertBlock();
|
||||
@ -7308,7 +7359,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) {
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createTask(
|
||||
Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
|
||||
BodyGenCB,
|
||||
/*DeallocIPs=*/{}, BodyGenCB,
|
||||
/*Tied=*/false, /*Final*/ nullptr, /*IfCondition*/ nullptr, DDS));
|
||||
Builder.restoreIP(AfterIP);
|
||||
OMPBuilder.finalize();
|
||||
@ -7370,7 +7421,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) {
|
||||
OMPBuilder.initialize();
|
||||
F->setName("func");
|
||||
IRBuilder<> Builder(BB);
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
return Error::success();
|
||||
};
|
||||
BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
|
||||
@ -7381,7 +7433,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) {
|
||||
ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U));
|
||||
OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
|
||||
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB,
|
||||
OMPBuilder.createTask(Loc, AllocaIP, /*DeallocIPs=*/{},
|
||||
BodyGenCB,
|
||||
/*Tied=*/false, Final));
|
||||
Builder.restoreIP(AfterIP);
|
||||
OMPBuilder.finalize();
|
||||
@ -7428,7 +7481,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) {
|
||||
OMPBuilder.initialize();
|
||||
F->setName("func");
|
||||
IRBuilder<> Builder(BB);
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
return Error::success();
|
||||
};
|
||||
BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
|
||||
@ -7438,10 +7492,10 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) {
|
||||
CmpInst::Predicate::ICMP_EQ, F->getArg(0),
|
||||
ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U));
|
||||
OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
|
||||
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB,
|
||||
/*Tied=*/false, /*Final=*/nullptr,
|
||||
IfCondition));
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createTask(Loc, AllocaIP, /*DeallocIPs=*/{}, BodyGenCB,
|
||||
/*Tied=*/false, /*Final=*/nullptr, IfCondition));
|
||||
Builder.restoreIP(AfterIP);
|
||||
OMPBuilder.finalize();
|
||||
Builder.CreateRetVoid();
|
||||
@ -7507,8 +7561,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) {
|
||||
|
||||
Value *InternalStoreInst, *InternalLoad32, *InternalLoad128, *InternalIfCmp;
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
Builder.restoreIP(AllocaIP);
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
Builder.restoreIP(AllocIP);
|
||||
AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
|
||||
"bodygen.alloca128");
|
||||
|
||||
@ -7536,7 +7591,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) {
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createTaskgroup(
|
||||
Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
|
||||
Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), {},
|
||||
BodyGenCB));
|
||||
Builder.restoreIP(AfterIP);
|
||||
OMPBuilder.finalize();
|
||||
@ -7598,14 +7653,16 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) {
|
||||
F->setName("func");
|
||||
IRBuilder<> Builder(BB);
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
Builder.restoreIP(AllocaIP);
|
||||
auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
Builder.restoreIP(AllocIP);
|
||||
AllocaInst *Alloca32 =
|
||||
Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, "bodygen.alloca32");
|
||||
AllocaInst *Alloca64 =
|
||||
Builder.CreateAlloca(Builder.getInt64Ty(), nullptr, "bodygen.alloca64");
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
auto TaskBodyGenCB1 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto TaskBodyGenCB1 = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
LoadInst *LoadValue =
|
||||
Builder.CreateLoad(Alloca64->getAllocatedType(), Alloca64);
|
||||
@ -7614,11 +7671,13 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) {
|
||||
return Error::success();
|
||||
};
|
||||
OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
|
||||
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, TaskIP1,
|
||||
OMPBuilder.createTask(Loc, AllocaIP, TaskBodyGenCB1));
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, TaskIP1,
|
||||
OMPBuilder.createTask(Loc, AllocIP, DeallocIPs, TaskBodyGenCB1));
|
||||
Builder.restoreIP(TaskIP1);
|
||||
|
||||
auto TaskBodyGenCB2 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
|
||||
auto TaskBodyGenCB2 = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
|
||||
ArrayRef<InsertPointTy> DeallocIPs) {
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
LoadInst *LoadValue =
|
||||
Builder.CreateLoad(Alloca32->getAllocatedType(), Alloca32);
|
||||
@ -7627,8 +7686,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) {
|
||||
return Error::success();
|
||||
};
|
||||
OpenMPIRBuilder::LocationDescription Loc2(Builder.saveIP(), DL);
|
||||
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, TaskIP2,
|
||||
OMPBuilder.createTask(Loc2, AllocaIP, TaskBodyGenCB2));
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, TaskIP2,
|
||||
OMPBuilder.createTask(Loc2, AllocIP, DeallocIPs, TaskBodyGenCB2));
|
||||
Builder.restoreIP(TaskIP2);
|
||||
};
|
||||
|
||||
@ -7639,7 +7699,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) {
|
||||
ASSERT_EXPECTED_INIT(
|
||||
OpenMPIRBuilder::InsertPointTy, AfterIP,
|
||||
OMPBuilder.createTaskgroup(
|
||||
Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
|
||||
Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), {},
|
||||
BODYGENCB_WRAPPER(BodyGenCB)));
|
||||
Builder.restoreIP(AfterIP);
|
||||
OMPBuilder.finalize();
|
||||
|
@ -711,7 +711,8 @@ TEST(CodeExtractor, OpenMPAggregateArgs) {
|
||||
/* AssumptionCache */ nullptr,
|
||||
/* AllowVarArgs */ true,
|
||||
/* AllowAlloca */ true,
|
||||
/* AllocaBlock*/ &Func->getEntryBlock(),
|
||||
/* AllocationBlock*/ &Func->getEntryBlock(),
|
||||
/* DeallocationBlocks */ {},
|
||||
/* Suffix */ ".outlined",
|
||||
/* ArgsInZeroAddressSpace */ true);
|
||||
|
||||
|
@ -223,21 +223,19 @@ def ScheduleModifier : OpenMP_I32EnumAttr<
|
||||
def ScheduleModifierAttr : OpenMP_EnumAttr<ScheduleModifier, "sched_mod">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// target_region_flags enum.
|
||||
// target_exec_mode enum.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def TargetRegionFlagsNone : I32BitEnumAttrCaseNone<"none">;
|
||||
def TargetRegionFlagsGeneric : I32BitEnumAttrCaseBit<"generic", 0>;
|
||||
def TargetRegionFlagsSpmd : I32BitEnumAttrCaseBit<"spmd", 1>;
|
||||
def TargetRegionFlagsTripCount : I32BitEnumAttrCaseBit<"trip_count", 2>;
|
||||
def TargetExecModeBare : I32EnumAttrCase<"bare", 0>;
|
||||
def TargetExecModeGeneric : I32EnumAttrCase<"generic", 1>;
|
||||
def TargetExecModeSpmd : I32EnumAttrCase<"spmd", 2>;
|
||||
|
||||
def TargetRegionFlags : OpenMP_BitEnumAttr<
|
||||
"TargetRegionFlags",
|
||||
"target region property flags", [
|
||||
TargetRegionFlagsNone,
|
||||
TargetRegionFlagsGeneric,
|
||||
TargetRegionFlagsSpmd,
|
||||
TargetRegionFlagsTripCount
|
||||
def TargetExecMode : OpenMP_I32EnumAttr<
|
||||
"TargetExecMode",
|
||||
"target execution mode, mirroring the `OMPTgtExecModeFlags` LLVM enum", [
|
||||
TargetExecModeBare,
|
||||
TargetExecModeGeneric,
|
||||
TargetExecModeSpmd,
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1517,13 +1517,17 @@ def TargetOp : OpenMP_Op<"target", traits = [
|
||||
/// operations, the top level one will be the one captured.
|
||||
Operation *getInnermostCapturedOmpOp();
|
||||
|
||||
/// Infers the kernel type (Generic, SPMD or Generic-SPMD) based on the
|
||||
/// contents of the target region.
|
||||
/// Infers the kernel type (Bare, Generic or SPMD) based on the contents of
|
||||
/// the target region.
|
||||
///
|
||||
/// \param capturedOp result of a still valid (no modifications made to any
|
||||
/// nested operations) previous call to `getInnermostCapturedOmpOp()`.
|
||||
static ::mlir::omp::TargetRegionFlags
|
||||
getKernelExecFlags(Operation *capturedOp);
|
||||
/// \param hostEvalTripCount output argument to store whether this kernel
|
||||
/// wraps a loop whose bounds must be evaluated on the host prior to
|
||||
/// launching it.
|
||||
static ::mlir::omp::TargetExecMode
|
||||
getKernelExecFlags(Operation *capturedOp,
|
||||
bool *hostEvalTripCount = nullptr);
|
||||
}] # clausesExtraClassDeclaration;
|
||||
|
||||
let assemblyFormat = clausesAssemblyFormat # [{
|
||||
|
@ -1974,8 +1974,9 @@ LogicalResult TargetOp::verifyRegions() {
|
||||
return emitError("target containing multiple 'omp.teams' nested ops");
|
||||
|
||||
// Check that host_eval values are only used in legal ways.
|
||||
bool hostEvalTripCount;
|
||||
Operation *capturedOp = getInnermostCapturedOmpOp();
|
||||
TargetRegionFlags execFlags = getKernelExecFlags(capturedOp);
|
||||
TargetExecMode execMode = getKernelExecFlags(capturedOp, &hostEvalTripCount);
|
||||
for (Value hostEvalArg :
|
||||
cast<BlockArgOpenMPOpInterface>(getOperation()).getHostEvalBlockArgs()) {
|
||||
for (Operation *user : hostEvalArg.getUsers()) {
|
||||
@ -1990,7 +1991,7 @@ LogicalResult TargetOp::verifyRegions() {
|
||||
"and 'thread_limit' in 'omp.teams'";
|
||||
}
|
||||
if (auto parallelOp = dyn_cast<ParallelOp>(user)) {
|
||||
if (bitEnumContainsAny(execFlags, TargetRegionFlags::spmd) &&
|
||||
if (execMode == TargetExecMode::spmd &&
|
||||
parallelOp->isAncestor(capturedOp) &&
|
||||
hostEvalArg == parallelOp.getNumThreads())
|
||||
continue;
|
||||
@ -2000,8 +2001,7 @@ LogicalResult TargetOp::verifyRegions() {
|
||||
"'omp.parallel' when representing target SPMD";
|
||||
}
|
||||
if (auto loopNestOp = dyn_cast<LoopNestOp>(user)) {
|
||||
if (bitEnumContainsAny(execFlags, TargetRegionFlags::trip_count) &&
|
||||
loopNestOp.getOperation() == capturedOp &&
|
||||
if (hostEvalTripCount && loopNestOp.getOperation() == capturedOp &&
|
||||
(llvm::is_contained(loopNestOp.getLoopLowerBounds(), hostEvalArg) ||
|
||||
llvm::is_contained(loopNestOp.getLoopUpperBounds(), hostEvalArg) ||
|
||||
llvm::is_contained(loopNestOp.getLoopSteps(), hostEvalArg)))
|
||||
@ -2106,7 +2106,9 @@ Operation *TargetOp::getInnermostCapturedOmpOp() {
|
||||
});
|
||||
}
|
||||
|
||||
TargetRegionFlags TargetOp::getKernelExecFlags(Operation *capturedOp) {
|
||||
TargetExecMode TargetOp::getKernelExecFlags(Operation *capturedOp,
|
||||
bool *hostEvalTripCount) {
|
||||
// TODO: Support detection of bare kernel mode.
|
||||
// A non-null captured op is only valid if it resides inside of a TargetOp
|
||||
// and is the result of calling getInnermostCapturedOmpOp() on it.
|
||||
TargetOp targetOp =
|
||||
@ -2115,9 +2117,12 @@ TargetRegionFlags TargetOp::getKernelExecFlags(Operation *capturedOp) {
|
||||
(targetOp && targetOp.getInnermostCapturedOmpOp() == capturedOp)) &&
|
||||
"unexpected captured op");
|
||||
|
||||
if (hostEvalTripCount)
|
||||
*hostEvalTripCount = false;
|
||||
|
||||
// If it's not capturing a loop, it's a default target region.
|
||||
if (!isa_and_present<LoopNestOp>(capturedOp))
|
||||
return TargetRegionFlags::generic;
|
||||
return TargetExecMode::generic;
|
||||
|
||||
// Get the innermost non-simd loop wrapper.
|
||||
SmallVector<LoopWrapperInterface> loopWrappers;
|
||||
@ -2130,79 +2135,59 @@ TargetRegionFlags TargetOp::getKernelExecFlags(Operation *capturedOp) {
|
||||
|
||||
auto numWrappers = std::distance(innermostWrapper, loopWrappers.end());
|
||||
if (numWrappers != 1 && numWrappers != 2)
|
||||
return TargetRegionFlags::generic;
|
||||
return TargetExecMode::generic;
|
||||
|
||||
// Detect target-teams-distribute-parallel-wsloop[-simd].
|
||||
if (numWrappers == 2) {
|
||||
if (!isa<WsloopOp>(innermostWrapper))
|
||||
return TargetRegionFlags::generic;
|
||||
return TargetExecMode::generic;
|
||||
|
||||
innermostWrapper = std::next(innermostWrapper);
|
||||
if (!isa<DistributeOp>(innermostWrapper))
|
||||
return TargetRegionFlags::generic;
|
||||
return TargetExecMode::generic;
|
||||
|
||||
Operation *parallelOp = (*innermostWrapper)->getParentOp();
|
||||
if (!isa_and_present<ParallelOp>(parallelOp))
|
||||
return TargetRegionFlags::generic;
|
||||
return TargetExecMode::generic;
|
||||
|
||||
Operation *teamsOp = parallelOp->getParentOp();
|
||||
if (!isa_and_present<TeamsOp>(teamsOp))
|
||||
return TargetRegionFlags::generic;
|
||||
return TargetExecMode::generic;
|
||||
|
||||
if (teamsOp->getParentOp() == targetOp.getOperation())
|
||||
return TargetRegionFlags::spmd | TargetRegionFlags::trip_count;
|
||||
if (teamsOp->getParentOp() == targetOp.getOperation()) {
|
||||
if (hostEvalTripCount)
|
||||
*hostEvalTripCount = true;
|
||||
return TargetExecMode::spmd;
|
||||
}
|
||||
}
|
||||
// Detect target-teams-distribute[-simd] and target-teams-loop.
|
||||
else if (isa<DistributeOp, LoopOp>(innermostWrapper)) {
|
||||
Operation *teamsOp = (*innermostWrapper)->getParentOp();
|
||||
if (!isa_and_present<TeamsOp>(teamsOp))
|
||||
return TargetRegionFlags::generic;
|
||||
return TargetExecMode::generic;
|
||||
|
||||
if (teamsOp->getParentOp() != targetOp.getOperation())
|
||||
return TargetRegionFlags::generic;
|
||||
return TargetExecMode::generic;
|
||||
|
||||
if (hostEvalTripCount)
|
||||
*hostEvalTripCount = true;
|
||||
|
||||
if (isa<LoopOp>(innermostWrapper))
|
||||
return TargetRegionFlags::spmd | TargetRegionFlags::trip_count;
|
||||
return TargetExecMode::spmd;
|
||||
|
||||
// Find single immediately nested captured omp.parallel and add spmd flag
|
||||
// (generic-spmd case).
|
||||
//
|
||||
// TODO: This shouldn't have to be done here, as it is too easy to break.
|
||||
// The openmp-opt pass should be updated to be able to promote kernels like
|
||||
// this from "Generic" to "Generic-SPMD". However, the use of the
|
||||
// `kmpc_distribute_static_loop` family of functions produced by the
|
||||
// OMPIRBuilder for these kernels prevents that from working.
|
||||
Dialect *ompDialect = targetOp->getDialect();
|
||||
Operation *nestedCapture = findCapturedOmpOp(
|
||||
capturedOp, /*checkSingleMandatoryExec=*/false,
|
||||
[&](Operation *sibling) {
|
||||
return sibling && (ompDialect != sibling->getDialect() ||
|
||||
sibling->hasTrait<OpTrait::IsTerminator>());
|
||||
});
|
||||
|
||||
TargetRegionFlags result =
|
||||
TargetRegionFlags::generic | TargetRegionFlags::trip_count;
|
||||
|
||||
if (!nestedCapture)
|
||||
return result;
|
||||
|
||||
while (nestedCapture->getParentOp() != capturedOp)
|
||||
nestedCapture = nestedCapture->getParentOp();
|
||||
|
||||
return isa<ParallelOp>(nestedCapture) ? result | TargetRegionFlags::spmd
|
||||
: result;
|
||||
return TargetExecMode::generic;
|
||||
}
|
||||
// Detect target-parallel-wsloop[-simd].
|
||||
else if (isa<WsloopOp>(innermostWrapper)) {
|
||||
Operation *parallelOp = (*innermostWrapper)->getParentOp();
|
||||
if (!isa_and_present<ParallelOp>(parallelOp))
|
||||
return TargetRegionFlags::generic;
|
||||
return TargetExecMode::generic;
|
||||
|
||||
if (parallelOp->getParentOp() == targetOp.getOperation())
|
||||
return TargetRegionFlags::spmd;
|
||||
return TargetExecMode::spmd;
|
||||
}
|
||||
|
||||
return TargetRegionFlags::generic;
|
||||
return TargetExecMode::generic;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -66,14 +66,17 @@ convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
|
||||
|
||||
/// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
|
||||
/// insertion points for allocas.
|
||||
class OpenMPAllocaStackFrame
|
||||
: public StateStackFrameBase<OpenMPAllocaStackFrame> {
|
||||
class OpenMPAllocStackFrame
|
||||
: public StateStackFrameBase<OpenMPAllocStackFrame> {
|
||||
public:
|
||||
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
|
||||
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocStackFrame)
|
||||
|
||||
explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
|
||||
: allocaInsertPoint(allocaIP) {}
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
|
||||
explicit OpenMPAllocStackFrame(
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocIP,
|
||||
llvm::ArrayRef<llvm::OpenMPIRBuilder::InsertPointTy> deallocIPs)
|
||||
: allocInsertPoint(allocIP), deallocInsertPoints(deallocIPs) {}
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocInsertPoint;
|
||||
llvm::SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> deallocInsertPoints;
|
||||
};
|
||||
|
||||
/// Stack frame to hold a \see llvm::CanonicalLoopInfo representing the
|
||||
@ -482,26 +485,33 @@ static LogicalResult handleError(llvm::Expected<T> &result, Operation &op) {
|
||||
|
||||
/// Find the insertion point for allocas given the current insertion point for
|
||||
/// normal operations in the builder.
|
||||
static llvm::OpenMPIRBuilder::InsertPointTy
|
||||
findAllocaInsertPoint(llvm::IRBuilderBase &builder,
|
||||
LLVM::ModuleTranslation &moduleTranslation) {
|
||||
// If there is an alloca insertion point on stack, i.e. we are in a nested
|
||||
static llvm::OpenMPIRBuilder::InsertPointTy findAllocInsertPoints(
|
||||
llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
|
||||
llvm::SmallVectorImpl<llvm::OpenMPIRBuilder::InsertPointTy> *deallocIPs =
|
||||
nullptr) {
|
||||
// If there is an allocation insertion point on stack, i.e. we are in a nested
|
||||
// operation and a specific point was provided by some surrounding operation,
|
||||
// use it.
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
|
||||
WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
|
||||
[&](OpenMPAllocaStackFrame &frame) {
|
||||
allocaInsertPoint = frame.allocaInsertPoint;
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocInsertPoint;
|
||||
llvm::ArrayRef<llvm::OpenMPIRBuilder::InsertPointTy> deallocInsertPoints;
|
||||
WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocStackFrame>(
|
||||
[&](OpenMPAllocStackFrame &frame) {
|
||||
allocInsertPoint = frame.allocInsertPoint;
|
||||
deallocInsertPoints = frame.deallocInsertPoints;
|
||||
return WalkResult::interrupt();
|
||||
});
|
||||
// In cases with multiple levels of outlining, the tree walk might find an
|
||||
// alloca insertion point that is inside the original function while the
|
||||
// builder insertion point is inside the outlined function. We need to make
|
||||
// sure that we do not use it in those cases.
|
||||
// insertion point that is inside the original function while the builder
|
||||
// insertion point is inside the outlined function. We need to make sure that
|
||||
// we do not use it in those cases.
|
||||
if (walkResult.wasInterrupted() &&
|
||||
allocaInsertPoint.getBlock()->getParent() ==
|
||||
builder.GetInsertBlock()->getParent())
|
||||
return allocaInsertPoint;
|
||||
allocInsertPoint.getBlock()->getParent() ==
|
||||
builder.GetInsertBlock()->getParent()) {
|
||||
if (deallocIPs)
|
||||
deallocIPs->insert(deallocIPs->end(), deallocInsertPoints.begin(),
|
||||
deallocInsertPoints.end());
|
||||
return allocInsertPoint;
|
||||
}
|
||||
|
||||
// Otherwise, insert to the entry block of the surrounding function.
|
||||
// If the current IRBuilder InsertPoint is the function's entry, it cannot
|
||||
@ -509,7 +519,7 @@ findAllocaInsertPoint(llvm::IRBuilderBase &builder,
|
||||
// confusion. Create a new BasicBlock for the Builder and use the entry block
|
||||
// for the allocs.
|
||||
// TODO: Create a dedicated alloca BasicBlock at function creation such that
|
||||
// we do not need to move the current InertPoint here.
|
||||
// we do not need to move the current InsertPoint here.
|
||||
if (builder.GetInsertBlock() ==
|
||||
&builder.GetInsertBlock()->getParent()->getEntryBlock()) {
|
||||
assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
|
||||
@ -521,6 +531,16 @@ findAllocaInsertPoint(llvm::IRBuilderBase &builder,
|
||||
builder.SetInsertPoint(entryBB);
|
||||
}
|
||||
|
||||
// Collect exit blocks, which is where explicit deallocations should happen in
|
||||
// this case.
|
||||
if (deallocIPs) {
|
||||
for (llvm::BasicBlock &block : *builder.GetInsertBlock()->getParent()) {
|
||||
llvm::Instruction *terminator = block.getTerminator();
|
||||
if (isa_and_present<llvm::ReturnInst>(terminator))
|
||||
deallocIPs->emplace_back(&block, terminator->getIterator());
|
||||
}
|
||||
}
|
||||
|
||||
llvm::BasicBlock &funcEntryBlock =
|
||||
builder.GetInsertBlock()->getParent()->getEntryBlock();
|
||||
return llvm::OpenMPIRBuilder::InsertPointTy(
|
||||
@ -708,7 +728,8 @@ convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
if (failed(checkImplementationStatus(opInst)))
|
||||
return failure();
|
||||
|
||||
auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
|
||||
auto bodyGenCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP,
|
||||
llvm::ArrayRef<InsertPointTy> deallocIPs) {
|
||||
// MaskedOp has only one region associated with it.
|
||||
auto ®ion = maskedOp.getRegion();
|
||||
builder.restoreIP(codeGenIP);
|
||||
@ -752,7 +773,8 @@ convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
if (failed(checkImplementationStatus(opInst)))
|
||||
return failure();
|
||||
|
||||
auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
|
||||
auto bodyGenCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP,
|
||||
llvm::ArrayRef<InsertPointTy> deallocIPs) {
|
||||
// MasterOp has only one region associated with it.
|
||||
auto ®ion = masterOp.getRegion();
|
||||
builder.restoreIP(codeGenIP);
|
||||
@ -787,7 +809,8 @@ convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
if (failed(checkImplementationStatus(opInst)))
|
||||
return failure();
|
||||
|
||||
auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
|
||||
auto bodyGenCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP,
|
||||
llvm::ArrayRef<InsertPointTy> deallocIPs) {
|
||||
// CriticalOp has only one region associated with it.
|
||||
auto ®ion = cast<omp::CriticalOp>(opInst).getRegion();
|
||||
builder.restoreIP(codeGenIP);
|
||||
@ -1047,7 +1070,7 @@ convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
indexVecValues++;
|
||||
}
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
||||
findAllocaInsertPoint(builder, moduleTranslation);
|
||||
findAllocInsertPoints(builder, moduleTranslation);
|
||||
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
||||
builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
|
||||
ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
|
||||
@ -1066,7 +1089,8 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
if (failed(checkImplementationStatus(opInst)))
|
||||
return failure();
|
||||
|
||||
auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
|
||||
auto bodyGenCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP,
|
||||
llvm::ArrayRef<InsertPointTy> deallocIPs) {
|
||||
// OrderedOp has only one region associated with it.
|
||||
auto ®ion = cast<omp::OrderedRegionOp>(opInst).getRegion();
|
||||
builder.restoreIP(codeGenIP);
|
||||
@ -1102,12 +1126,63 @@ struct DeferredStore {
|
||||
};
|
||||
} // namespace
|
||||
|
||||
/// Check whether allocations for the given operation might potentially have to
|
||||
/// be done in device shared memory. That means we're compiling for a offloading
|
||||
/// target, the operation is an `omp::TargetOp` or nested inside of one and that
|
||||
/// target region represents a Generic (non-SPMD) kernel.
|
||||
///
|
||||
/// This represents a necessary but not sufficient set of conditions to use
|
||||
/// device shared memory in place of regular allocas. For some variables, the
|
||||
/// associated OpenMP construct or their uses might also need to be taken into
|
||||
/// account.
|
||||
static bool
|
||||
mightAllocInDeviceSharedMemory(Operation &op,
|
||||
const llvm::OpenMPIRBuilder &ompBuilder) {
|
||||
if (!ompBuilder.Config.isTargetDevice())
|
||||
return false;
|
||||
|
||||
auto targetOp = dyn_cast<omp::TargetOp>(op);
|
||||
if (!targetOp)
|
||||
targetOp = op.getParentOfType<omp::TargetOp>();
|
||||
|
||||
return targetOp &&
|
||||
targetOp.getKernelExecFlags(targetOp.getInnermostCapturedOmpOp()) ==
|
||||
omp::TargetExecMode::generic;
|
||||
}
|
||||
|
||||
/// Check whether the entry block argument representing the private copy of a
|
||||
/// variable in an OpenMP construct must be allocated in device shared memory,
|
||||
/// based on what the uses of that copy are.
|
||||
///
|
||||
/// This must only be called if a previous call to
|
||||
/// \c mightAllocInDeviceSharedMemory has already returned \c true for the
|
||||
/// operation that owns the specified block argument.
|
||||
static bool mustAllocPrivateVarInDeviceSharedMemory(BlockArgument value) {
|
||||
Operation *parentOp = value.getOwner()->getParentOp();
|
||||
auto targetOp = dyn_cast<omp::TargetOp>(parentOp);
|
||||
if (!targetOp)
|
||||
targetOp = parentOp->getParentOfType<omp::TargetOp>();
|
||||
assert(targetOp && "expected a parent omp.target operation");
|
||||
|
||||
for (auto *user : value.getUsers()) {
|
||||
if (auto parallelOp = dyn_cast<omp::ParallelOp>(user)) {
|
||||
if (llvm::is_contained(parallelOp.getReductionVars(), value))
|
||||
return true;
|
||||
} else if (auto parallelOp = user->getParentOfType<omp::ParallelOp>()) {
|
||||
if (parentOp->isProperAncestor(parallelOp))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Allocate space for privatized reduction variables.
|
||||
/// `deferredStores` contains information to create store operations which needs
|
||||
/// to be inserted after all allocas
|
||||
template <typename T>
|
||||
static LogicalResult
|
||||
allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
|
||||
allocReductionVars(T op, ArrayRef<BlockArgument> reductionArgs,
|
||||
llvm::IRBuilderBase &builder,
|
||||
LLVM::ModuleTranslation &moduleTranslation,
|
||||
const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
|
||||
@ -1119,10 +1194,14 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
|
||||
llvm::IRBuilderBase::InsertPointGuard guard(builder);
|
||||
builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
|
||||
|
||||
// delay creating stores until after all allocas
|
||||
deferredStores.reserve(loop.getNumReductionVars());
|
||||
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
|
||||
bool useDeviceSharedMem =
|
||||
isa<omp::TeamsOp>(op) && mightAllocInDeviceSharedMemory(*op, *ompBuilder);
|
||||
|
||||
for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
|
||||
// delay creating stores until after all allocas
|
||||
deferredStores.reserve(op.getNumReductionVars());
|
||||
|
||||
for (std::size_t i = 0; i < op.getNumReductionVars(); ++i) {
|
||||
Region &allocRegion = reductionDecls[i].getAllocRegion();
|
||||
if (isByRefs[i]) {
|
||||
if (allocRegion.empty())
|
||||
@ -1131,7 +1210,7 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
|
||||
SmallVector<llvm::Value *, 1> phis;
|
||||
if (failed(inlineConvertOmpRegions(allocRegion, "omp.reduction.alloc",
|
||||
builder, moduleTranslation, &phis)))
|
||||
return loop.emitError(
|
||||
return op.emitError(
|
||||
"failed to inline `alloc` region of `omp.declare_reduction`");
|
||||
|
||||
assert(phis.size() == 1 && "expected one allocation to be yielded");
|
||||
@ -1139,33 +1218,43 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
|
||||
|
||||
// Allocate reduction variable (which is a pointer to the real reduction
|
||||
// variable allocated in the inlined region)
|
||||
llvm::Value *var = builder.CreateAlloca(
|
||||
moduleTranslation.convertType(reductionDecls[i].getType()));
|
||||
|
||||
llvm::Type *ptrTy = builder.getPtrTy();
|
||||
llvm::Value *castVar =
|
||||
builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
|
||||
llvm::Type *varTy =
|
||||
moduleTranslation.convertType(reductionDecls[i].getType());
|
||||
llvm::Value *var;
|
||||
if (useDeviceSharedMem) {
|
||||
var = ompBuilder->createOMPAllocShared(builder, varTy);
|
||||
} else {
|
||||
var = builder.CreateAlloca(varTy);
|
||||
var = builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
|
||||
}
|
||||
|
||||
llvm::Value *castPhi =
|
||||
builder.CreatePointerBitCastOrAddrSpaceCast(phis[0], ptrTy);
|
||||
|
||||
deferredStores.emplace_back(castPhi, castVar);
|
||||
deferredStores.emplace_back(castPhi, var);
|
||||
|
||||
privateReductionVariables[i] = castVar;
|
||||
privateReductionVariables[i] = var;
|
||||
moduleTranslation.mapValue(reductionArgs[i], castPhi);
|
||||
reductionVariableMap.try_emplace(loop.getReductionVars()[i], castPhi);
|
||||
reductionVariableMap.try_emplace(op.getReductionVars()[i], castPhi);
|
||||
} else {
|
||||
assert(allocRegion.empty() &&
|
||||
"allocaction is implicit for by-val reduction");
|
||||
llvm::Value *var = builder.CreateAlloca(
|
||||
moduleTranslation.convertType(reductionDecls[i].getType()));
|
||||
|
||||
llvm::Type *ptrTy = builder.getPtrTy();
|
||||
llvm::Value *castVar =
|
||||
builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
|
||||
llvm::Type *varTy =
|
||||
moduleTranslation.convertType(reductionDecls[i].getType());
|
||||
llvm::Value *var;
|
||||
if (useDeviceSharedMem) {
|
||||
var = ompBuilder->createOMPAllocShared(builder, varTy);
|
||||
} else {
|
||||
var = builder.CreateAlloca(varTy);
|
||||
var = builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
|
||||
}
|
||||
|
||||
moduleTranslation.mapValue(reductionArgs[i], castVar);
|
||||
privateReductionVariables[i] = castVar;
|
||||
reductionVariableMap.try_emplace(loop.getReductionVars()[i], castVar);
|
||||
moduleTranslation.mapValue(reductionArgs[i], var);
|
||||
privateReductionVariables[i] = var;
|
||||
reductionVariableMap.try_emplace(op.getReductionVars()[i], var);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1227,6 +1316,10 @@ initReductionVars(OP op, ArrayRef<BlockArgument> reductionArgs,
|
||||
if (op.getNumReductionVars() == 0)
|
||||
return success();
|
||||
|
||||
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
|
||||
bool useDeviceSharedMem =
|
||||
isa<omp::TeamsOp>(op) && mightAllocInDeviceSharedMemory(*op, *ompBuilder);
|
||||
|
||||
llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
|
||||
auto allocaIP = llvm::IRBuilderBase::InsertPoint(
|
||||
latestAllocaBlock, latestAllocaBlock->getTerminator()->getIterator());
|
||||
@ -1241,8 +1334,12 @@ initReductionVars(OP op, ArrayRef<BlockArgument> reductionArgs,
|
||||
// TODO: remove after all users of by-ref are updated to use the alloc
|
||||
// region: Allocate reduction variable (which is a pointer to the real
|
||||
// reduciton variable allocated in the inlined region)
|
||||
byRefVars[i] = builder.CreateAlloca(
|
||||
moduleTranslation.convertType(reductionDecls[i].getType()));
|
||||
llvm::Type *varTy =
|
||||
moduleTranslation.convertType(reductionDecls[i].getType());
|
||||
if (useDeviceSharedMem)
|
||||
byRefVars[i] = ompBuilder->createOMPAllocShared(builder, varTy);
|
||||
else
|
||||
byRefVars[i] = builder.CreateAlloca(varTy);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1438,10 +1535,20 @@ static LogicalResult createReductionsAndCleanup(
|
||||
[](omp::DeclareReductionOp reductionDecl) {
|
||||
return &reductionDecl.getCleanupRegion();
|
||||
});
|
||||
return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
|
||||
moduleTranslation, builder,
|
||||
"omp.reduction.cleanup");
|
||||
return success();
|
||||
LogicalResult result = inlineOmpRegionCleanup(
|
||||
reductionRegions, privateReductionVariables, moduleTranslation, builder,
|
||||
"omp.reduction.cleanup");
|
||||
|
||||
bool useDeviceSharedMem =
|
||||
isa<omp::TeamsOp>(op) && mightAllocInDeviceSharedMemory(*op, *ompBuilder);
|
||||
if (useDeviceSharedMem) {
|
||||
for (auto [var, reductionDecl] :
|
||||
llvm::zip_equal(privateReductionVariables, reductionDecls))
|
||||
ompBuilder->createOMPFreeShared(
|
||||
builder, var, moduleTranslation.convertType(reductionDecl.getType()));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static ArrayRef<bool> getIsByRef(std::optional<ArrayRef<bool>> attr) {
|
||||
@ -1586,8 +1693,9 @@ initPrivateVars(llvm::IRBuilderBase &builder,
|
||||
/// Allocate and initialize delayed private variables. Returns the basic block
|
||||
/// which comes after all of these allocations. llvm::Value * for each of these
|
||||
/// private variables are populated in llvmPrivateVars.
|
||||
template <typename T>
|
||||
static llvm::Expected<llvm::BasicBlock *>
|
||||
allocatePrivateVars(llvm::IRBuilderBase &builder,
|
||||
allocatePrivateVars(T op, llvm::IRBuilderBase &builder,
|
||||
LLVM::ModuleTranslation &moduleTranslation,
|
||||
PrivateVarsInfo &privateVarsInfo,
|
||||
const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
|
||||
@ -1610,6 +1718,10 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
|
||||
llvm::DataLayout dataLayout = builder.GetInsertBlock()->getDataLayout();
|
||||
llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor(0);
|
||||
|
||||
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
|
||||
bool mightUseDeviceSharedMem =
|
||||
isa<omp::TeamsOp, omp::DistributeOp>(*op) &&
|
||||
mightAllocInDeviceSharedMemory(*op, *ompBuilder);
|
||||
unsigned int allocaAS =
|
||||
moduleTranslation.getLLVMModule()->getDataLayout().getAllocaAddrSpace();
|
||||
unsigned int defaultAS = moduleTranslation.getLLVMModule()
|
||||
@ -1622,11 +1734,17 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
|
||||
llvm::Type *llvmAllocType =
|
||||
moduleTranslation.convertType(privDecl.getType());
|
||||
builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
|
||||
llvm::Value *llvmPrivateVar = builder.CreateAlloca(
|
||||
llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
|
||||
if (allocaAS != defaultAS)
|
||||
llvmPrivateVar = builder.CreateAddrSpaceCast(llvmPrivateVar,
|
||||
builder.getPtrTy(defaultAS));
|
||||
llvm::Value *llvmPrivateVar = nullptr;
|
||||
if (mightUseDeviceSharedMem &&
|
||||
mustAllocPrivateVarInDeviceSharedMemory(blockArg)) {
|
||||
llvmPrivateVar = ompBuilder->createOMPAllocShared(builder, llvmAllocType);
|
||||
} else {
|
||||
llvmPrivateVar = builder.CreateAlloca(
|
||||
llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
|
||||
if (allocaAS != defaultAS)
|
||||
llvmPrivateVar = builder.CreateAddrSpaceCast(
|
||||
llvmPrivateVar, builder.getPtrTy(defaultAS));
|
||||
}
|
||||
|
||||
privateVarsInfo.llvmVars.push_back(llvmPrivateVar);
|
||||
}
|
||||
@ -1698,24 +1816,41 @@ static LogicalResult copyFirstPrivateVars(
|
||||
return success();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static LogicalResult
|
||||
cleanupPrivateVars(llvm::IRBuilderBase &builder,
|
||||
cleanupPrivateVars(T op, llvm::IRBuilderBase &builder,
|
||||
LLVM::ModuleTranslation &moduleTranslation, Location loc,
|
||||
SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
|
||||
SmallVectorImpl<omp::PrivateClauseOp> &privateDecls) {
|
||||
PrivateVarsInfo &privateVarsInfo) {
|
||||
// private variable deallocation
|
||||
SmallVector<Region *> privateCleanupRegions;
|
||||
llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
|
||||
llvm::transform(privateVarsInfo.privatizers,
|
||||
std::back_inserter(privateCleanupRegions),
|
||||
[](omp::PrivateClauseOp privatizer) {
|
||||
return &privatizer.getDeallocRegion();
|
||||
});
|
||||
|
||||
if (failed(inlineOmpRegionCleanup(
|
||||
privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
|
||||
"omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
|
||||
if (failed(inlineOmpRegionCleanup(privateCleanupRegions,
|
||||
privateVarsInfo.llvmVars, moduleTranslation,
|
||||
builder, "omp.private.dealloc",
|
||||
/*shouldLoadCleanupRegionArg=*/false)))
|
||||
return mlir::emitError(loc, "failed to inline `dealloc` region of an "
|
||||
"`omp.private` op in");
|
||||
|
||||
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
|
||||
bool mightUseDeviceSharedMem =
|
||||
isa<omp::TeamsOp, omp::DistributeOp>(*op) &&
|
||||
mightAllocInDeviceSharedMemory(*op, *ompBuilder);
|
||||
for (auto [privDecl, llvmPrivVar, blockArg] :
|
||||
llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.llvmVars,
|
||||
privateVarsInfo.blockArgs)) {
|
||||
if (mightUseDeviceSharedMem &&
|
||||
mustAllocPrivateVarInDeviceSharedMemory(blockArg)) {
|
||||
ompBuilder->createOMPFreeShared(
|
||||
builder, llvmPrivVar,
|
||||
moduleTranslation.convertType(privDecl.getType()));
|
||||
}
|
||||
}
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
@ -1751,7 +1886,7 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
SmallVector<omp::DeclareReductionOp> reductionDecls;
|
||||
collectReductionDecls(sectionsOp, reductionDecls);
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
||||
findAllocaInsertPoint(builder, moduleTranslation);
|
||||
findAllocInsertPoints(builder, moduleTranslation);
|
||||
|
||||
SmallVector<llvm::Value *> privateReductionVariables(
|
||||
sectionsOp.getNumReductionVars());
|
||||
@ -1775,7 +1910,8 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
|
||||
Region ®ion = sectionOp.getRegion();
|
||||
auto sectionCB = [§ionsOp, ®ion, &builder, &moduleTranslation](
|
||||
InsertPointTy allocaIP, InsertPointTy codeGenIP) {
|
||||
InsertPointTy allocIP, InsertPointTy codeGenIP,
|
||||
ArrayRef<InsertPointTy> deallocIPs) {
|
||||
builder.restoreIP(codeGenIP);
|
||||
|
||||
// map the omp.section reduction block argument to the omp.sections block
|
||||
@ -1820,7 +1956,7 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
// called for variables which have destructors/finalizers.
|
||||
auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
|
||||
|
||||
allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
|
||||
allocaIP = findAllocInsertPoints(builder, moduleTranslation);
|
||||
bool isCancellable = constructIsCancellable(sectionsOp);
|
||||
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
||||
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
|
||||
@ -1849,7 +1985,8 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
|
||||
if (failed(checkImplementationStatus(*singleOp)))
|
||||
return failure();
|
||||
|
||||
auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
|
||||
auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codegenIP,
|
||||
llvm::ArrayRef<InsertPointTy> deallocIPs) {
|
||||
builder.restoreIP(codegenIP);
|
||||
return convertOmpOpRegions(singleOp.getRegion(), "omp.single.region",
|
||||
builder, moduleTranslation)
|
||||
@ -1932,7 +2069,7 @@ convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
|
||||
SmallVector<llvm::Value *> privateReductionVariables(numReductionVars);
|
||||
llvm::ArrayRef<bool> isByRef;
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
||||
findAllocaInsertPoint(builder, moduleTranslation);
|
||||
findAllocInsertPoints(builder, moduleTranslation);
|
||||
|
||||
// Only do teams reduction if there is no distribute op that captures the
|
||||
// reduction instead.
|
||||
@ -1954,9 +2091,10 @@ convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
|
||||
return failure();
|
||||
}
|
||||
|
||||
auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
|
||||
LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
|
||||
moduleTranslation, allocaIP);
|
||||
auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codegenIP,
|
||||
llvm::ArrayRef<InsertPointTy> deallocIPs) {
|
||||
LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame(
|
||||
moduleTranslation, allocIP, deallocIPs);
|
||||
builder.restoreIP(codegenIP);
|
||||
return convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
|
||||
moduleTranslation)
|
||||
@ -2213,9 +2351,9 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
|
||||
// code outside of the outlined task region, which is what we want because
|
||||
// this way the initialization and copy regions are executed immediately while
|
||||
// the host variable data are still live.
|
||||
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
||||
findAllocaInsertPoint(builder, moduleTranslation);
|
||||
llvm::SmallVector<InsertPointTy> deallocIPs;
|
||||
InsertPointTy allocIP =
|
||||
findAllocInsertPoints(builder, moduleTranslation, &deallocIPs);
|
||||
|
||||
// Not using splitBB() because that requires the current block to have a
|
||||
// terminator.
|
||||
@ -2245,8 +2383,8 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
|
||||
|
||||
// Save the alloca insertion point on ModuleTranslation stack for use in
|
||||
// nested regions.
|
||||
LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
|
||||
moduleTranslation, allocaIP);
|
||||
LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame(
|
||||
moduleTranslation, allocIP, deallocIPs);
|
||||
|
||||
// Allocate and initialize private variables
|
||||
builder.SetInsertPoint(initBlock->getTerminator());
|
||||
@ -2310,12 +2448,12 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
|
||||
// Set up for call to createTask()
|
||||
builder.SetInsertPoint(taskStartBlock);
|
||||
|
||||
auto bodyCB = [&](InsertPointTy allocaIP,
|
||||
InsertPointTy codegenIP) -> llvm::Error {
|
||||
auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codegenIP,
|
||||
llvm::ArrayRef<InsertPointTy> deallocIPs) -> llvm::Error {
|
||||
// Save the alloca insertion point on ModuleTranslation stack for use in
|
||||
// nested regions.
|
||||
LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
|
||||
moduleTranslation, allocaIP);
|
||||
LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame(
|
||||
moduleTranslation, allocIP, deallocIPs);
|
||||
|
||||
// translate the body of the task:
|
||||
builder.restoreIP(codegenIP);
|
||||
@ -2333,7 +2471,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
|
||||
llvm::IRBuilderBase::InsertPointGuard guard(builder);
|
||||
llvm::Type *llvmAllocType =
|
||||
moduleTranslation.convertType(privDecl.getType());
|
||||
builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
|
||||
builder.SetInsertPoint(allocIP.getBlock()->getTerminator());
|
||||
llvm::Value *llvmPrivateVar = builder.CreateAlloca(
|
||||
llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
|
||||
|
||||
@ -2382,9 +2520,8 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
|
||||
|
||||
builder.SetInsertPoint(continuationBlockOrError.get()->getTerminator());
|
||||
|
||||
if (failed(cleanupPrivateVars(builder, moduleTranslation, taskOp.getLoc(),
|
||||
privateVarsInfo.llvmVars,
|
||||
privateVarsInfo.privatizers)))
|
||||
if (failed(cleanupPrivateVars(taskOp, builder, moduleTranslation,
|
||||
taskOp.getLoc(), privateVarsInfo)))
|
||||
return llvm::make_error<PreviouslyReportedError>();
|
||||
|
||||
// Free heap allocated task context structure at the end of the task.
|
||||
@ -2408,7 +2545,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
|
||||
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
||||
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
|
||||
moduleTranslation.getOpenMPBuilder()->createTask(
|
||||
ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
|
||||
ompLoc, allocIP, deallocIPs, bodyCB, !taskOp.getUntied(),
|
||||
moduleTranslation.lookupValue(taskOp.getFinal()),
|
||||
moduleTranslation.lookupValue(taskOp.getIfExpr()), dds,
|
||||
taskOp.getMergeable(),
|
||||
@ -2433,18 +2570,21 @@ convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
|
||||
if (failed(checkImplementationStatus(*tgOp)))
|
||||
return failure();
|
||||
|
||||
auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
|
||||
auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codegenIP,
|
||||
llvm::ArrayRef<InsertPointTy> deallocIPs) {
|
||||
builder.restoreIP(codegenIP);
|
||||
return convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region",
|
||||
builder, moduleTranslation)
|
||||
.takeError();
|
||||
};
|
||||
|
||||
InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
|
||||
llvm::SmallVector<InsertPointTy> deallocIPs;
|
||||
InsertPointTy allocIP =
|
||||
findAllocInsertPoints(builder, moduleTranslation, &deallocIPs);
|
||||
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
||||
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
|
||||
moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocaIP,
|
||||
bodyCB);
|
||||
moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocIP,
|
||||
deallocIPs, bodyCB);
|
||||
|
||||
if (failed(handleError(afterIP, *tgOp)))
|
||||
return failure();
|
||||
@ -2494,14 +2634,15 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
|
||||
SmallVector<omp::DeclareReductionOp> reductionDecls;
|
||||
collectReductionDecls(wsloopOp, reductionDecls);
|
||||
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
||||
findAllocaInsertPoint(builder, moduleTranslation);
|
||||
findAllocInsertPoints(builder, moduleTranslation);
|
||||
|
||||
SmallVector<llvm::Value *> privateReductionVariables(
|
||||
wsloopOp.getNumReductionVars());
|
||||
|
||||
llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
|
||||
builder, moduleTranslation, privateVarsInfo, allocaIP);
|
||||
wsloopOp, builder, moduleTranslation, privateVarsInfo, allocaIP);
|
||||
if (handleError(afterAllocas, opInst).failed())
|
||||
return failure();
|
||||
|
||||
@ -2627,9 +2768,8 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
/*isTeamsReduction=*/false)))
|
||||
return failure();
|
||||
|
||||
return cleanupPrivateVars(builder, moduleTranslation, wsloopOp.getLoc(),
|
||||
privateVarsInfo.llvmVars,
|
||||
privateVarsInfo.privatizers);
|
||||
return cleanupPrivateVars(wsloopOp, builder, moduleTranslation,
|
||||
wsloopOp.getLoc(), privateVarsInfo);
|
||||
}
|
||||
|
||||
/// Converts the OpenMP parallel operation to LLVM IR.
|
||||
@ -2653,10 +2793,11 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
|
||||
opInst.getNumReductionVars());
|
||||
SmallVector<DeferredStore> deferredStores;
|
||||
|
||||
auto bodyGenCB = [&](InsertPointTy allocaIP,
|
||||
InsertPointTy codeGenIP) -> llvm::Error {
|
||||
auto bodyGenCB =
|
||||
[&](InsertPointTy allocIP, InsertPointTy codeGenIP,
|
||||
llvm::ArrayRef<InsertPointTy> deallocIPs) -> llvm::Error {
|
||||
llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
|
||||
builder, moduleTranslation, privateVarsInfo, allocaIP);
|
||||
opInst, builder, moduleTranslation, privateVarsInfo, allocIP);
|
||||
if (handleError(afterAllocas, *opInst).failed())
|
||||
return llvm::make_error<PreviouslyReportedError>();
|
||||
|
||||
@ -2666,12 +2807,11 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
|
||||
MutableArrayRef<BlockArgument> reductionArgs =
|
||||
cast<omp::BlockArgOpenMPOpInterface>(*opInst).getReductionBlockArgs();
|
||||
|
||||
allocaIP =
|
||||
InsertPointTy(allocaIP.getBlock(),
|
||||
allocaIP.getBlock()->getTerminator()->getIterator());
|
||||
allocIP = InsertPointTy(allocIP.getBlock(),
|
||||
allocIP.getBlock()->getTerminator()->getIterator());
|
||||
|
||||
if (failed(allocReductionVars(
|
||||
opInst, reductionArgs, builder, moduleTranslation, allocaIP,
|
||||
opInst, reductionArgs, builder, moduleTranslation, allocIP,
|
||||
reductionDecls, privateReductionVariables, reductionVariableMap,
|
||||
deferredStores, isByRef)))
|
||||
return llvm::make_error<PreviouslyReportedError>();
|
||||
@ -2700,8 +2840,8 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
|
||||
|
||||
// Save the alloca insertion point on ModuleTranslation stack for use in
|
||||
// nested regions.
|
||||
LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
|
||||
moduleTranslation, allocaIP);
|
||||
LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame(
|
||||
moduleTranslation, allocIP, deallocIPs);
|
||||
|
||||
// ParallelOp has only one region associated with it.
|
||||
llvm::Expected<llvm::BasicBlock *> regionBlock = convertOmpOpRegions(
|
||||
@ -2728,7 +2868,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
|
||||
|
||||
llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
|
||||
ompBuilder->createReductions(
|
||||
builder.saveIP(), allocaIP, reductionInfos, isByRef,
|
||||
builder.saveIP(), allocIP, reductionInfos, isByRef,
|
||||
/*IsNoWait=*/false, /*IsTeamsReduction=*/false);
|
||||
if (!contInsertPoint)
|
||||
return contInsertPoint.takeError();
|
||||
@ -2770,9 +2910,8 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
|
||||
return llvm::createStringError(
|
||||
"failed to inline `cleanup` region of `omp.declare_reduction`");
|
||||
|
||||
if (failed(cleanupPrivateVars(builder, moduleTranslation, opInst.getLoc(),
|
||||
privateVarsInfo.llvmVars,
|
||||
privateVarsInfo.privatizers)))
|
||||
if (failed(cleanupPrivateVars(opInst, builder, moduleTranslation,
|
||||
opInst.getLoc(), privateVarsInfo)))
|
||||
return llvm::make_error<PreviouslyReportedError>();
|
||||
|
||||
builder.restoreIP(oldIP);
|
||||
@ -2790,13 +2929,15 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
|
||||
pbKind = getProcBindKind(*bind);
|
||||
bool isCancellable = constructIsCancellable(opInst);
|
||||
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
||||
findAllocaInsertPoint(builder, moduleTranslation);
|
||||
llvm::SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> deallocIPs;
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocIP =
|
||||
findAllocInsertPoints(builder, moduleTranslation, &deallocIPs);
|
||||
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
||||
|
||||
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
|
||||
ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
|
||||
ifCond, numThreads, pbKind, isCancellable);
|
||||
ompBuilder->createParallel(ompLoc, allocIP, deallocIPs, bodyGenCB, privCB,
|
||||
finiCB, ifCond, numThreads, pbKind,
|
||||
isCancellable);
|
||||
|
||||
if (failed(handleError(afterIP, *opInst)))
|
||||
return failure();
|
||||
@ -2841,10 +2982,10 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
assert(isByRef.size() == simdOp.getNumReductionVars());
|
||||
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
||||
findAllocaInsertPoint(builder, moduleTranslation);
|
||||
findAllocInsertPoints(builder, moduleTranslation);
|
||||
|
||||
llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
|
||||
builder, moduleTranslation, privateVarsInfo, allocaIP);
|
||||
simdOp, builder, moduleTranslation, privateVarsInfo, allocaIP);
|
||||
if (handleError(afterAllocas, opInst).failed())
|
||||
return failure();
|
||||
|
||||
@ -2958,9 +3099,8 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
"omp.reduction.cleanup")))
|
||||
return failure();
|
||||
|
||||
return cleanupPrivateVars(builder, moduleTranslation, simdOp.getLoc(),
|
||||
privateVarsInfo.llvmVars,
|
||||
privateVarsInfo.privatizers);
|
||||
return cleanupPrivateVars(simdOp, builder, moduleTranslation, simdOp.getLoc(),
|
||||
privateVarsInfo);
|
||||
}
|
||||
|
||||
/// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
|
||||
@ -3148,7 +3288,7 @@ convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
|
||||
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
||||
findAllocaInsertPoint(builder, moduleTranslation);
|
||||
findAllocInsertPoints(builder, moduleTranslation);
|
||||
|
||||
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
||||
|
||||
@ -3175,7 +3315,7 @@ convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
|
||||
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
||||
findAllocaInsertPoint(builder, moduleTranslation);
|
||||
findAllocInsertPoints(builder, moduleTranslation);
|
||||
|
||||
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
||||
llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrder());
|
||||
@ -3292,7 +3432,7 @@ convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
|
||||
extractAtomicControlFlags(opInst, isIgnoreDenormalMode, isFineGrainedMemory,
|
||||
isRemoteMemory);
|
||||
// Handle ambiguous alloca, if any.
|
||||
auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
|
||||
auto allocaIP = findAllocInsertPoints(builder, moduleTranslation);
|
||||
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
||||
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
|
||||
ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr,
|
||||
@ -3393,7 +3533,7 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
|
||||
extractAtomicControlFlags(atomicUpdateOp, isIgnoreDenormalMode,
|
||||
isFineGrainedMemory, isRemoteMemory);
|
||||
// Handle ambiguous alloca, if any.
|
||||
auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
|
||||
auto allocaIP = findAllocInsertPoints(builder, moduleTranslation);
|
||||
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
||||
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
|
||||
ompBuilder->createAtomicCapture(
|
||||
@ -4357,7 +4497,7 @@ createAlteredByCaptureMap(MapInfoData &mapData,
|
||||
if (!isPtrTy) {
|
||||
auto curInsert = builder.saveIP();
|
||||
llvm::DebugLoc DbgLoc = builder.getCurrentDebugLocation();
|
||||
builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
|
||||
builder.restoreIP(findAllocInsertPoints(builder, moduleTranslation));
|
||||
auto *memTempAlloc =
|
||||
builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
|
||||
builder.SetCurrentDebugLocation(DbgLoc);
|
||||
@ -4735,18 +4875,21 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
|
||||
};
|
||||
|
||||
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
||||
findAllocaInsertPoint(builder, moduleTranslation);
|
||||
llvm::SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> deallocIPs;
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocIP =
|
||||
findAllocInsertPoints(builder, moduleTranslation, &deallocIPs);
|
||||
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = [&]() {
|
||||
if (isa<omp::TargetDataOp>(op))
|
||||
return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(),
|
||||
return ompBuilder->createTargetData(ompLoc, allocIP, builder.saveIP(),
|
||||
deallocIPs,
|
||||
builder.getInt64(deviceID), ifCond,
|
||||
info, genMapInfoCB, customMapperCB,
|
||||
/*MapperFunc=*/nullptr, bodyGenCB,
|
||||
/*DeviceAddrCB=*/nullptr);
|
||||
return ompBuilder->createTargetData(
|
||||
ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
|
||||
info, genMapInfoCB, customMapperCB, &RTLFn);
|
||||
return ompBuilder->createTargetData(ompLoc, allocIP, builder.saveIP(),
|
||||
deallocIPs, builder.getInt64(deviceID),
|
||||
ifCond, info, genMapInfoCB,
|
||||
customMapperCB, &RTLFn);
|
||||
}();
|
||||
|
||||
if (failed(handleError(afterIP, *op)))
|
||||
@ -4782,7 +4925,7 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
|
||||
collectReductionDecls(teamsOp, reductionDecls);
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
||||
findAllocaInsertPoint(builder, moduleTranslation);
|
||||
findAllocInsertPoints(builder, moduleTranslation);
|
||||
|
||||
MutableArrayRef<BlockArgument> reductionArgs =
|
||||
llvm::cast<omp::BlockArgOpenMPOpInterface>(*teamsOp)
|
||||
@ -4796,19 +4939,20 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
}
|
||||
|
||||
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
|
||||
auto bodyGenCB = [&](InsertPointTy allocaIP,
|
||||
InsertPointTy codeGenIP) -> llvm::Error {
|
||||
auto bodyGenCB =
|
||||
[&](InsertPointTy allocIP, InsertPointTy codeGenIP,
|
||||
llvm::ArrayRef<InsertPointTy> deallocIPs) -> llvm::Error {
|
||||
// Save the alloca insertion point on ModuleTranslation stack for use in
|
||||
// nested regions.
|
||||
LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
|
||||
moduleTranslation, allocaIP);
|
||||
LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame(
|
||||
moduleTranslation, allocIP, deallocIPs);
|
||||
|
||||
// DistributeOp has only one region associated with it.
|
||||
builder.restoreIP(codeGenIP);
|
||||
PrivateVarsInfo privVarsInfo(distributeOp);
|
||||
|
||||
llvm::Expected<llvm::BasicBlock *> afterAllocas =
|
||||
allocatePrivateVars(builder, moduleTranslation, privVarsInfo, allocaIP);
|
||||
llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
|
||||
distributeOp, builder, moduleTranslation, privVarsInfo, allocIP);
|
||||
if (handleError(afterAllocas, opInst).failed())
|
||||
return llvm::make_error<PreviouslyReportedError>();
|
||||
|
||||
@ -4851,7 +4995,7 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
findCurrentLoopInfo(moduleTranslation);
|
||||
llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
|
||||
ompBuilder->applyWorkshareLoop(
|
||||
ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
|
||||
ompLoc.DL, loopInfo, allocIP, loopNeedsBarrier,
|
||||
convertToScheduleKind(schedule), chunk, isSimd,
|
||||
scheduleMod == omp::ScheduleModifier::monotonic,
|
||||
scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
|
||||
@ -4861,19 +5005,19 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
return wsloopIP.takeError();
|
||||
}
|
||||
|
||||
if (failed(cleanupPrivateVars(builder, moduleTranslation,
|
||||
distributeOp.getLoc(), privVarsInfo.llvmVars,
|
||||
privVarsInfo.privatizers)))
|
||||
if (failed(cleanupPrivateVars(distributeOp, builder, moduleTranslation,
|
||||
distributeOp.getLoc(), privVarsInfo)))
|
||||
return llvm::make_error<PreviouslyReportedError>();
|
||||
|
||||
return llvm::Error::success();
|
||||
};
|
||||
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
||||
findAllocaInsertPoint(builder, moduleTranslation);
|
||||
llvm::SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> deallocIPs;
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocIP =
|
||||
findAllocInsertPoints(builder, moduleTranslation, &deallocIPs);
|
||||
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
||||
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
|
||||
ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB);
|
||||
ompBuilder->createDistribute(ompLoc, allocIP, deallocIPs, bodyGenCB);
|
||||
|
||||
if (failed(handleError(afterIP, opInst)))
|
||||
return failure();
|
||||
@ -4883,7 +5027,7 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
if (doDistributeReduction) {
|
||||
// Process the reductions if required.
|
||||
return createReductionsAndCleanup(
|
||||
teamsOp, builder, moduleTranslation, allocaIP, reductionDecls,
|
||||
teamsOp, builder, moduleTranslation, allocIP, reductionDecls,
|
||||
privateReductionVariables, isByRef,
|
||||
/*isNoWait*/ false, /*isTeamsReduction*/ true);
|
||||
}
|
||||
@ -5354,17 +5498,18 @@ initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp,
|
||||
}
|
||||
|
||||
// Update kernel bounds structure for the `OpenMPIRBuilder` to use.
|
||||
omp::TargetRegionFlags kernelFlags = targetOp.getKernelExecFlags(capturedOp);
|
||||
assert(
|
||||
omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic |
|
||||
omp::TargetRegionFlags::spmd) &&
|
||||
"invalid kernel flags");
|
||||
attrs.ExecFlags =
|
||||
omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic)
|
||||
? omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::spmd)
|
||||
? llvm::omp::OMP_TGT_EXEC_MODE_GENERIC_SPMD
|
||||
: llvm::omp::OMP_TGT_EXEC_MODE_GENERIC
|
||||
: llvm::omp::OMP_TGT_EXEC_MODE_SPMD;
|
||||
omp::TargetExecMode execMode = targetOp.getKernelExecFlags(capturedOp);
|
||||
switch (execMode) {
|
||||
case omp::TargetExecMode::bare:
|
||||
attrs.ExecFlags = llvm::omp::OMP_TGT_EXEC_MODE_BARE;
|
||||
break;
|
||||
case omp::TargetExecMode::generic:
|
||||
attrs.ExecFlags = llvm::omp::OMP_TGT_EXEC_MODE_GENERIC;
|
||||
break;
|
||||
case omp::TargetExecMode::spmd:
|
||||
attrs.ExecFlags = llvm::omp::OMP_TGT_EXEC_MODE_SPMD;
|
||||
break;
|
||||
}
|
||||
attrs.MinTeams = minTeamsVal;
|
||||
attrs.MaxTeams.front() = maxTeamsVal;
|
||||
attrs.MinThreads = 1;
|
||||
@ -5414,8 +5559,9 @@ initTargetRuntimeAttrs(llvm::IRBuilderBase &builder,
|
||||
if (numThreads)
|
||||
attrs.MaxThreads = moduleTranslation.lookupValue(numThreads);
|
||||
|
||||
if (omp::bitEnumContainsAny(targetOp.getKernelExecFlags(capturedOp),
|
||||
omp::TargetRegionFlags::trip_count)) {
|
||||
bool hostEvalTripCount;
|
||||
targetOp.getKernelExecFlags(capturedOp, &hostEvalTripCount);
|
||||
if (hostEvalTripCount) {
|
||||
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
|
||||
attrs.LoopTripCount = nullptr;
|
||||
|
||||
@ -5551,7 +5697,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
}
|
||||
|
||||
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
|
||||
auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP)
|
||||
auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP,
|
||||
ArrayRef<InsertPointTy> deallocIPs)
|
||||
-> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
|
||||
llvm::IRBuilderBase::InsertPointGuard guard(builder);
|
||||
builder.SetCurrentDebugLocation(llvm::DebugLoc());
|
||||
@ -5592,8 +5739,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
PrivateVarsInfo privateVarsInfo(targetOp);
|
||||
|
||||
llvm::Expected<llvm::BasicBlock *> afterAllocas =
|
||||
allocatePrivateVars(builder, moduleTranslation, privateVarsInfo,
|
||||
allocaIP, &mappedPrivateVars);
|
||||
allocatePrivateVars(targetOp, builder, moduleTranslation,
|
||||
privateVarsInfo, allocIP, &mappedPrivateVars);
|
||||
|
||||
if (failed(handleError(afterAllocas, *targetOp)))
|
||||
return llvm::make_error<PreviouslyReportedError>();
|
||||
@ -5618,6 +5765,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
return &privatizer.getDeallocRegion();
|
||||
});
|
||||
|
||||
LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame(
|
||||
moduleTranslation, allocIP, deallocIPs);
|
||||
llvm::Expected<llvm::BasicBlock *> exitBlock = convertOmpOpRegions(
|
||||
targetRegion, "omp.target", builder, moduleTranslation);
|
||||
|
||||
@ -5721,8 +5870,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(),
|
||||
moduleTranslation, dds);
|
||||
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
||||
findAllocaInsertPoint(builder, moduleTranslation);
|
||||
llvm::SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> deallocIPs;
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocIP =
|
||||
findAllocInsertPoints(builder, moduleTranslation, &deallocIPs);
|
||||
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
||||
|
||||
llvm::OpenMPIRBuilder::TargetDataInfo info(
|
||||
@ -5744,9 +5894,10 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
|
||||
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
|
||||
moduleTranslation.getOpenMPBuilder()->createTarget(
|
||||
ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), info, entryInfo,
|
||||
defaultAttrs, runtimeAttrs, ifCond, kernelInput, genMapInfoCB, bodyCB,
|
||||
argAccessorCB, customMapperCB, dds, targetOp.getNowait());
|
||||
ompLoc, isOffloadEntry, allocIP, builder.saveIP(), deallocIPs, info,
|
||||
entryInfo, defaultAttrs, runtimeAttrs, ifCond, kernelInput,
|
||||
genMapInfoCB, bodyCB, argAccessorCB, customMapperCB, dds,
|
||||
targetOp.getNowait());
|
||||
|
||||
if (failed(handleError(afterIP, opInst)))
|
||||
return failure();
|
||||
@ -5764,6 +5915,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
|
||||
static LogicalResult
|
||||
convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
|
||||
llvm::OpenMPIRBuilder *ompBuilder,
|
||||
LLVM::ModuleTranslation &moduleTranslation) {
|
||||
// Amend omp.declare_target by deleting the IR of the outlined functions
|
||||
// created for target regions. They cannot be filtered out from MLIR earlier
|
||||
@ -5786,6 +5938,11 @@ convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
|
||||
moduleTranslation.lookupFunction(funcOp.getName());
|
||||
llvmFunc->dropAllReferences();
|
||||
llvmFunc->eraseFromParent();
|
||||
|
||||
// Invalidate the builder's current insertion point, as it now points to
|
||||
// a deleted block.
|
||||
ompBuilder->Builder.ClearInsertionPoint();
|
||||
ompBuilder->Builder.SetCurrentDebugLocation(llvm::DebugLoc());
|
||||
}
|
||||
}
|
||||
return success();
|
||||
@ -6230,9 +6387,12 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
|
||||
.Case("omp.declare_target",
|
||||
[&](Attribute attr) {
|
||||
if (auto declareTargetAttr =
|
||||
dyn_cast<omp::DeclareTargetAttr>(attr))
|
||||
dyn_cast<omp::DeclareTargetAttr>(attr)) {
|
||||
llvm::OpenMPIRBuilder *ompBuilder =
|
||||
moduleTranslation.getOpenMPBuilder();
|
||||
return convertDeclareTargetAttr(op, declareTargetAttr,
|
||||
moduleTranslation);
|
||||
ompBuilder, moduleTranslation);
|
||||
}
|
||||
return failure();
|
||||
})
|
||||
.Case("omp.requires",
|
||||
|
109
mlir/test/Target/LLVMIR/omptarget-device-shared-memory.mlir
Normal file
109
mlir/test/Target/LLVMIR/omptarget-device-shared-memory.mlir
Normal file
@ -0,0 +1,109 @@
|
||||
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
|
||||
|
||||
// This test checks that, when compiling for an offloading target, device shared
|
||||
// memory will be used in place of allocas for certain private variables.
|
||||
|
||||
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
|
||||
omp.private {type = private} @privatizer : i32
|
||||
omp.declare_reduction @reduction : i32 init {
|
||||
^bb0(%arg0: i32):
|
||||
%0 = llvm.mlir.constant(0 : i32) : i32
|
||||
omp.yield(%0 : i32)
|
||||
} combiner {
|
||||
^bb0(%arg0: i32, %arg1: i32):
|
||||
%0 = llvm.add %arg0, %arg1 : i32
|
||||
omp.yield(%0 : i32)
|
||||
}
|
||||
llvm.func @main() {
|
||||
%c0 = llvm.mlir.constant(1 : i64) : i64
|
||||
%1 = llvm.alloca %c0 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr<5>
|
||||
%2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
|
||||
%3 = llvm.alloca %c0 x i32 {bindc_name = "y"} : (i64) -> !llvm.ptr<5>
|
||||
%4 = llvm.addrspacecast %3 : !llvm.ptr<5> to !llvm.ptr
|
||||
%5 = llvm.alloca %c0 x i32 {bindc_name = "z"} : (i64) -> !llvm.ptr<5>
|
||||
%6 = llvm.addrspacecast %5 : !llvm.ptr<5> to !llvm.ptr
|
||||
%7 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "x"}
|
||||
%8 = omp.map.info var_ptr(%4 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "y"}
|
||||
%9 = omp.map.info var_ptr(%6 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "z"}
|
||||
omp.target map_entries(%7 -> %arg0, %8 -> %arg1, %9 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
|
||||
%11 = llvm.mlir.constant(10000 : i32) : i32
|
||||
%12 = llvm.mlir.constant(1 : i32) : i32
|
||||
omp.teams reduction(@reduction %arg0 -> %arg3 : !llvm.ptr) {
|
||||
omp.distribute private(@privatizer %arg1 -> %arg4, @privatizer %arg2 -> %arg5 : !llvm.ptr, !llvm.ptr) {
|
||||
omp.loop_nest (%arg6) : i32 = (%12) to (%11) inclusive step (%12) {
|
||||
llvm.store %arg6, %arg4 : i32, !llvm.ptr
|
||||
%13 = llvm.load %arg3 : !llvm.ptr -> i32
|
||||
%14 = llvm.add %13, %12 : i32
|
||||
llvm.store %14, %arg3 : i32, !llvm.ptr
|
||||
omp.parallel reduction(@reduction %arg5 -> %arg7 : !llvm.ptr) {
|
||||
%15 = llvm.load %arg4 : !llvm.ptr -> i32
|
||||
%16 = llvm.load %arg7 : !llvm.ptr -> i32
|
||||
%17 = llvm.add %15, %16 : i32
|
||||
llvm.store %17, %arg7 : i32, !llvm.ptr
|
||||
omp.terminator
|
||||
}
|
||||
omp.yield
|
||||
}
|
||||
}
|
||||
omp.terminator
|
||||
}
|
||||
omp.terminator
|
||||
}
|
||||
// CHECK: call i32 @__kmpc_target_init
|
||||
// CHECK: call void @[[OUTLINED_TARGET:__omp_offloading_[A-Za-z0-9_.]*]]
|
||||
|
||||
// CHECK: define internal void @[[OUTLINED_TARGET]]
|
||||
// CHECK: %[[X_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
|
||||
// CHECK: %[[GEP_X:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg
|
||||
// CHECK-NEXT: store ptr %[[X_PRIV]], ptr addrspace(5) %[[GEP_X]]
|
||||
// CHECK-NEXT: call void @[[OUTLINED_TEAMS:__omp_offloading_[A-Za-z0-9_.]*]](ptr %structArg.ascast)
|
||||
|
||||
// CHECK: [[REDUCE_FINALIZE_BB:reduce\.finalize.*]]:
|
||||
// CHECK-NEXT: %{{.*}} = call i32 @__kmpc_global_thread_num
|
||||
// CHECK-NEXT: call void @__kmpc_barrier
|
||||
// CHECK-NEXT: call void @__kmpc_free_shared(ptr %[[X_PRIV]], i64 4)
|
||||
|
||||
// CHECK: define internal void @[[OUTLINED_TEAMS]]
|
||||
// CHECK: %[[Y_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
|
||||
// CHECK: %[[Z_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
|
||||
|
||||
// %[[GEP_Y:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg
|
||||
// store ptr %[[Y_PRIV]], ptr addrspace(5) %[[GEP_Y]], align 8
|
||||
// %[[GEP_Z:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg
|
||||
// store ptr %[[Z_PRIV]], ptr addrspace(5) %[[GEP_Z]], align 8
|
||||
|
||||
// CHECK: call void @__kmpc_free_shared(ptr %[[Y_PRIV]], i64 4)
|
||||
// CHECK-NEXT: call void @__kmpc_free_shared(ptr %[[Z_PRIV]], i64 4)
|
||||
// CHECK-NEXT: br label %[[EXIT_BB:.*]]
|
||||
|
||||
// CHECK: [[EXIT_BB]]:
|
||||
// CHECK-NEXT: ret void
|
||||
|
||||
// Test that we don't misidentify a private `distribute` value as being
|
||||
// located inside of a parallel region if that parallel region is not nested
|
||||
// inside of `omp.distribute`.
|
||||
omp.parallel {
|
||||
%18 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "x"}
|
||||
omp.target map_entries(%18 -> %arg0 : !llvm.ptr) {
|
||||
%19 = llvm.mlir.constant(10000 : i32) : i32
|
||||
%20 = llvm.mlir.constant(1 : i32) : i32
|
||||
omp.teams {
|
||||
omp.distribute private(@privatizer %arg0 -> %arg1 : !llvm.ptr) {
|
||||
omp.loop_nest (%arg2) : i32 = (%20) to (%19) inclusive step (%20) {
|
||||
llvm.store %arg2, %arg1 : i32, !llvm.ptr
|
||||
omp.yield
|
||||
}
|
||||
}
|
||||
omp.terminator
|
||||
}
|
||||
omp.terminator
|
||||
}
|
||||
omp.terminator
|
||||
}
|
||||
// CHECK: call i32 @__kmpc_target_init
|
||||
// CHECK-NOT: call {{.*}} @__kmpc_alloc_shared
|
||||
// CHECK-NOT: call {{.*}} @__kmpc_free_shared
|
||||
|
||||
llvm.return
|
||||
}
|
||||
}
|
@ -55,22 +55,22 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
|
||||
// CHECK: define weak_odr protected amdgpu_kernel void @[[FUNC0:.*]](
|
||||
// CHECK-SAME: ptr %[[TMP:.*]], ptr %[[TMP0:.*]]) #{{[0-9]+}} {
|
||||
// CHECK: %[[TMP1:.*]] = alloca [1 x ptr], align 8, addrspace(5)
|
||||
// CHECK: %[[TMP2:.*]] = addrspacecast ptr addrspace(5) %[[TMP1]] to ptr
|
||||
// CHECK: %[[STRUCTARG:.*]] = alloca { ptr }, align 8, addrspace(5)
|
||||
// CHECK: %[[STRUCTARG_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[STRUCTARG]] to ptr
|
||||
// CHECK: %[[TMP3:.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// CHECK: %[[TMP4:.*]] = addrspacecast ptr addrspace(5) %[[TMP3]] to ptr
|
||||
// CHECK: store ptr %[[TMP0]], ptr %[[TMP4]], align 8
|
||||
// CHECK: %[[TMP5:.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{.*}} to ptr), ptr %[[TMP]])
|
||||
// CHECK: %[[EXEC_USER_CODE:.*]] = icmp eq i32 %[[TMP5]], -1
|
||||
// CHECK: %[[TMP2:.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// CHECK: %[[TMP3:.*]] = addrspacecast ptr addrspace(5) %[[TMP2]] to ptr
|
||||
// CHECK: store ptr %[[TMP0]], ptr %[[TMP3]], align 8
|
||||
// CHECK: %[[TMP4:.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{.*}} to ptr), ptr %[[TMP]])
|
||||
// CHECK: %[[EXEC_USER_CODE:.*]] = icmp eq i32 %[[TMP4]], -1
|
||||
// CHECK: br i1 %[[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]]
|
||||
// CHECK: %[[TMP6:.*]] = load ptr, ptr %[[TMP4]], align 8
|
||||
// CHECK: %[[TMP5:.*]] = addrspacecast ptr addrspace(5) %[[TMP1]] to ptr
|
||||
// CHECK: %[[STRUCTARG:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
|
||||
// CHECK: %[[TMP6:.*]] = load ptr, ptr %[[TMP3]], align 8
|
||||
// CHECK: %[[OMP_GLOBAL_THREAD_NUM:.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
|
||||
// CHECK: %[[GEP_:.*]] = getelementptr { ptr }, ptr addrspace(5) %[[STRUCTARG]], i32 0, i32 0
|
||||
// CHECK: store ptr %[[TMP6]], ptr addrspace(5) %[[GEP_]], align 8
|
||||
// CHECK: %[[TMP7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[TMP2]], i64 0, i64 0
|
||||
// CHECK: store ptr %[[STRUCTARG_ASCAST]], ptr %[[TMP7]], align 8
|
||||
// CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1, i32 -1, i32 -1, ptr @[[FUNC1:.*]], ptr null, ptr %[[TMP2]], i64 1)
|
||||
// CHECK: %[[GEP_:.*]] = getelementptr { ptr }, ptr %[[STRUCTARG]], i32 0, i32 0
|
||||
// CHECK: store ptr %[[TMP6]], ptr %[[GEP_]], align 8
|
||||
// CHECK: %[[TMP7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[TMP5]], i64 0, i64 0
|
||||
// CHECK: store ptr %[[STRUCTARG]], ptr %[[TMP7]], align 8
|
||||
// CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1, i32 -1, i32 -1, ptr @[[FUNC1:.*]], ptr @[[FUNC1_WRAPPER:.*]], ptr %[[TMP5]], i64 1)
|
||||
// CHECK: call void @__kmpc_free_shared(ptr %[[STRUCTARG]], i64 8)
|
||||
// CHECK: call void @__kmpc_target_deinit()
|
||||
|
||||
// CHECK: define internal void @[[FUNC1]](
|
||||
@ -84,7 +84,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
|
||||
// CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (
|
||||
// CHECK-SAME: ptr addrspace(1) @[[NUM_THREADS_GLOB:[0-9]+]] to ptr),
|
||||
// CHECK-SAME: i32 [[NUM_THREADS_TMP0:%.*]], i32 1, i32 156,
|
||||
// CHECK-SAME: i32 -1, ptr [[FUNC_NUM_THREADS1:@.*]], ptr null, ptr [[NUM_THREADS_TMP1:%.*]], i64 1)
|
||||
// CHECK-SAME: i32 -1, ptr @[[FUNC_NUM_THREADS1:.*]], ptr @[[FUNC2_WRAPPER:.*]], ptr [[NUM_THREADS_TMP1:%.*]], i64 1)
|
||||
|
||||
// One of the arguments of kmpc_parallel_51 function is responsible for handling if clause
|
||||
// of omp parallel construct for target region. If this argument is nonzero,
|
||||
@ -105,4 +105,23 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
|
||||
// CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (
|
||||
// CHECK-SAME: ptr addrspace(1) {{.*}} to ptr),
|
||||
// CHECK-SAME: i32 {{.*}}, i32 %[[IFCOND_TMP4]], i32 -1,
|
||||
// CHECK-SAME: i32 -1, ptr {{.*}}, ptr null, ptr {{.*}}, i64 1)
|
||||
// CHECK-SAME: i32 -1, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, i64 1)
|
||||
|
||||
// CHECK: define internal void @[[FUNC1_WRAPPER]](i16 noundef zeroext %{{.*}}, i32 noundef %[[ADDR:.*]])
|
||||
// CHECK: %[[ADDR_ALLOCA:.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK: %[[ADDR_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[ADDR_ALLOCA]] to ptr
|
||||
// CHECK: %[[ZERO_ALLOCA:.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK: %[[ZERO_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[ZERO_ALLOCA]] to ptr
|
||||
// CHECK: %[[ARGS_ALLOCA:.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// CHECK: %[[ARGS_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[ARGS_ALLOCA]] to ptr
|
||||
// CHECK: store i32 %[[ADDR]], ptr %[[ADDR_ASCAST]]
|
||||
// CHECK: store i32 0, ptr %[[ZERO_ASCAST]]
|
||||
// CHECK: call void @__kmpc_get_shared_variables(ptr %[[ARGS_ASCAST]])
|
||||
// CHECK: %[[LOAD_ARGS:.*]] = load ptr, ptr %[[ARGS_ASCAST]], align 8
|
||||
// CHECK: %[[FIRST_ARG:.*]] = getelementptr inbounds ptr, ptr %[[LOAD_ARGS]], i64 0
|
||||
// CHECK: %[[STRUCTARG:.*]] = load ptr, ptr %[[FIRST_ARG]], align 8
|
||||
// CHECK: call void @[[FUNC1]](ptr %[[ADDR_ASCAST]], ptr %[[ZERO_ASCAST]], ptr %[[STRUCTARG]])
|
||||
|
||||
// CHECK: define internal void @[[FUNC2_WRAPPER]](i16 noundef zeroext %{{.*}}, i32 noundef %{{.*}})
|
||||
// CHECK-NOT: define
|
||||
// CHECK: call void @[[FUNC_NUM_THREADS1]]({{.*}})
|
||||
|
@ -56,7 +56,9 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
|
||||
// CHECK: %[[B:.*]] = load i32, ptr %[[PTR_B]], align 4
|
||||
// CHECK: %[[C:.*]] = add i32 %[[A]], %[[B]]
|
||||
// CHECK: store i32 %[[C]], ptr %[[PTR_C]], align 4
|
||||
// CHECK: br label %[[LABEL_DEINIT:.*]]
|
||||
// CHECK: br label %[[LABEL_TARGET_EXIT:.*]]
|
||||
// CHECK: [[LABEL_TARGET_EXIT]]:
|
||||
// CHECK-NEXT: br label %[[LABEL_DEINIT:.*]]
|
||||
// CHECK: [[LABEL_DEINIT]]:
|
||||
// CHECK-NEXT: call void @__kmpc_target_deinit()
|
||||
// CHECK-NEXT: ret void
|
||||
|
@ -87,7 +87,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
|
||||
}
|
||||
}
|
||||
|
||||
// DEVICE: @[[KERNEL_NAME:.*]]_exec_mode = weak protected constant i8 [[EXEC_MODE:3]]
|
||||
// DEVICE: @[[KERNEL_NAME:.*]]_exec_mode = weak protected constant i8 [[EXEC_MODE:1]]
|
||||
// DEVICE: @llvm.compiler.used = appending global [1 x ptr] [ptr @[[KERNEL_NAME]]_exec_mode], section "llvm.metadata"
|
||||
// DEVICE: @[[KERNEL_NAME]]_kernel_environment = weak_odr protected constant %struct.KernelEnvironmentTy {
|
||||
// DEVICE-SAME: %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 [[EXEC_MODE]], {{.*}}},
|
||||
|
@ -70,4 +70,6 @@ llvm.func @_FortranAAssign(!llvm.ptr, !llvm.ptr, !llvm.ptr, i32) -> !llvm.struct
|
||||
// CHECK: call void @dealloc_foo_1(ptr %[[DESC_TO_DEALLOC]])
|
||||
// CHECK-NEXT: br label %[[CONT_BLOCK:.*]]
|
||||
// CHECK: [[CONT_BLOCK]]:
|
||||
// CHECK-NEXT: br label %[[EXIT_BLOCK:.*]]
|
||||
// CHECK: [[EXIT_BLOCK]]:
|
||||
// CHECK-NEXT: ret void
|
||||
|
130
offload/test/offloading/fortran/target-generic-loops.f90
Normal file
130
offload/test/offloading/fortran/target-generic-loops.f90
Normal file
@ -0,0 +1,130 @@
|
||||
! Offloading test for generic target regions containing different kinds of
|
||||
! loop constructs inside.
|
||||
! REQUIRES: flang, amdgpu
|
||||
|
||||
! RUN: %libomptarget-compile-fortran-run-and-check-generic
|
||||
program main
|
||||
integer :: i1, i2, n1, n2, counter
|
||||
|
||||
n1 = 100
|
||||
n2 = 50
|
||||
|
||||
counter = 0
|
||||
!$omp target map(tofrom:counter)
|
||||
!$omp teams distribute reduction(+:counter)
|
||||
do i1=1, n1
|
||||
counter = counter + 1
|
||||
end do
|
||||
!$omp end target
|
||||
|
||||
! CHECK: 1 100
|
||||
print '(I2" "I0)', 1, counter
|
||||
|
||||
counter = 0
|
||||
!$omp target map(tofrom:counter)
|
||||
!$omp parallel do reduction(+:counter)
|
||||
do i1=1, n1
|
||||
counter = counter + 1
|
||||
end do
|
||||
!$omp parallel do reduction(+:counter)
|
||||
do i1=1, n1
|
||||
counter = counter + 1
|
||||
end do
|
||||
!$omp end target
|
||||
|
||||
! CHECK: 2 200
|
||||
print '(I2" "I0)', 2, counter
|
||||
|
||||
counter = 0
|
||||
!$omp target map(tofrom:counter)
|
||||
counter = counter + 1
|
||||
!$omp parallel do reduction(+:counter)
|
||||
do i1=1, n1
|
||||
counter = counter + 1
|
||||
end do
|
||||
counter = counter + 1
|
||||
!$omp parallel do reduction(+:counter)
|
||||
do i1=1, n1
|
||||
counter = counter + 1
|
||||
end do
|
||||
counter = counter + 1
|
||||
!$omp end target
|
||||
|
||||
! CHECK: 3 203
|
||||
print '(I2" "I0)', 3, counter
|
||||
|
||||
counter = 0
|
||||
!$omp target map(tofrom: counter)
|
||||
counter = counter + 1
|
||||
!$omp parallel do reduction(+:counter)
|
||||
do i1=1, n1
|
||||
counter = counter + 1
|
||||
end do
|
||||
counter = counter + 1
|
||||
!$omp end target
|
||||
|
||||
! CHECK: 4 102
|
||||
print '(I2" "I0)', 4, counter
|
||||
|
||||
|
||||
counter = 0
|
||||
!$omp target teams distribute reduction(+:counter)
|
||||
do i1=1, n1
|
||||
!$omp parallel do reduction(+:counter)
|
||||
do i2=1, n2
|
||||
counter = counter + 1
|
||||
end do
|
||||
end do
|
||||
|
||||
! CHECK: 5 5000
|
||||
print '(I2" "I0)', 5, counter
|
||||
|
||||
counter = 0
|
||||
!$omp target teams distribute reduction(+:counter)
|
||||
do i1=1, n1
|
||||
counter = counter + 1
|
||||
!$omp parallel do reduction(+:counter)
|
||||
do i2=1, n2
|
||||
counter = counter + 1
|
||||
end do
|
||||
counter = counter + 1
|
||||
end do
|
||||
|
||||
! CHECK: 6 5200
|
||||
print '(I2" "I0)', 6, counter
|
||||
|
||||
counter = 0
|
||||
!$omp target teams distribute reduction(+:counter)
|
||||
do i1=1, n1
|
||||
!$omp parallel do reduction(+:counter)
|
||||
do i2=1, n2
|
||||
counter = counter + 1
|
||||
end do
|
||||
!$omp parallel do reduction(+:counter)
|
||||
do i2=1, n2
|
||||
counter = counter + 1
|
||||
end do
|
||||
end do
|
||||
|
||||
! CHECK: 7 10000
|
||||
print '(I2" "I0)', 7, counter
|
||||
|
||||
counter = 0
|
||||
!$omp target teams distribute reduction(+:counter)
|
||||
do i1=1, n1
|
||||
counter = counter + 1
|
||||
!$omp parallel do reduction(+:counter)
|
||||
do i2=1, n2
|
||||
counter = counter + 1
|
||||
end do
|
||||
counter = counter + 1
|
||||
!$omp parallel do reduction(+:counter)
|
||||
do i2=1, n2
|
||||
counter = counter + 1
|
||||
end do
|
||||
counter = counter + 1
|
||||
end do
|
||||
|
||||
! CHECK: 8 10300
|
||||
print '(I2" "I0)', 8, counter
|
||||
end program
|
39
offload/test/offloading/fortran/target-spmd-loops.f90
Normal file
39
offload/test/offloading/fortran/target-spmd-loops.f90
Normal file
@ -0,0 +1,39 @@
|
||||
! Offloading test for generic target regions containing different kinds of
|
||||
! loop constructs inside.
|
||||
! REQUIRES: flang, amdgpu
|
||||
|
||||
! RUN: %libomptarget-compile-fortran-run-and-check-generic
|
||||
program main
|
||||
integer :: i1, n1, counter
|
||||
|
||||
n1 = 100
|
||||
|
||||
counter = 0
|
||||
!$omp target parallel do reduction(+:counter)
|
||||
do i1=1, n1
|
||||
counter = counter + 1
|
||||
end do
|
||||
|
||||
! CHECK: 1 100
|
||||
print '(I2" "I0)', 1, counter
|
||||
|
||||
counter = 0
|
||||
!$omp target map(tofrom:counter)
|
||||
!$omp parallel do reduction(+:counter)
|
||||
do i1=1, n1
|
||||
counter = counter + 1
|
||||
end do
|
||||
!$omp end target
|
||||
|
||||
! CHECK: 2 100
|
||||
print '(I2" "I0)', 2, counter
|
||||
|
||||
counter = 0
|
||||
!$omp target teams distribute parallel do reduction(+:counter)
|
||||
do i1=1, n1
|
||||
counter = counter + 1
|
||||
end do
|
||||
|
||||
! CHECK: 3 100
|
||||
print '(I2" "I0)', 3, counter
|
||||
end program
|
Loading…
x
Reference in New Issue
Block a user