
When compiling in `--hipstdpar` mode, the builtins corresponding to the standard library might end up in code that is expected to execute on the accelerator (e.g. by using the `std::` prefixed functions from `<cmath>`). We do not have uniform handling for this in AMDGPU, and the errors that obtain are quite arcane. Furthermore, the user-space changes required to work around this tend to be rather intrusive. This patch adds an additional `--hipstdpar` specific pass which forwards to the run time component of HIPSTDPAR the intrinsics / libcalls which result from the use of the math builtins, and which are not properly handled. In the long run we will want to stop relying on this and handle things in the compiler, but it is going to be a rather lengthy journey, which makes this medium term escape hatch necessary. The paired change in the run time component is here <https://github.com/ROCm/rocThrust/pull/551>.
640 lines
23 KiB
C++
640 lines
23 KiB
C++
//===----- HipStdPar.cpp - HIP C++ Standard Parallelism Support Passes ----===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
// This file implements two passes that enable HIP C++ Standard Parallelism
|
|
// Support:
|
|
//
|
|
// 1. AcceleratorCodeSelection (required): Given that only algorithms are
|
|
// accelerated, and that the accelerated implementation exists in the form of
|
|
// a compute kernel, we assume that only the kernel, and all functions
|
|
// reachable from it, constitute code that the user expects the accelerator
|
|
// to execute. Thus, we identify the set of all functions reachable from
|
|
// kernels, and then remove all unreachable ones. This last part is necessary
|
|
// because it is possible for code that the user did not expect to execute on
|
|
// an accelerator to contain constructs that cannot be handled by the target
|
|
// BE, which cannot be provably demonstrated to be dead code in general, and
|
|
// thus can lead to mis-compilation. The degenerate case of this is when a
|
|
// Module contains no kernels (the parent TU had no algorithm invocations fit
|
|
// for acceleration), which we handle by completely emptying said module.
|
|
// **NOTE**: The above does not handle indirectly reachable functions i.e.
|
|
// it is possible to obtain a case where the target of an indirect
|
|
// call is otherwise unreachable and thus is removed; this
|
|
// restriction is aligned with the current `-hipstdpar` limitations
|
|
// and will be relaxed in the future.
|
|
//
|
|
// 2. AllocationInterposition (required only when on-demand paging is
|
|
// unsupported): Some accelerators or operating systems might not support
|
|
// transparent on-demand paging. Thus, they would only be able to access
|
|
// memory that is allocated by an accelerator-aware mechanism. For such cases
|
|
// the user can opt into enabling allocation / deallocation interposition,
|
|
// whereby we replace calls to known allocation / deallocation functions with
|
|
// calls to runtime implemented equivalents that forward the requests to
|
|
// accelerator-aware interfaces. We also support freeing system allocated
|
|
// memory that ends up in one of the runtime equivalents, since this can
|
|
// happen if e.g. a library that was compiled without interposition returns
|
|
// an allocation that can be validly passed to `free`.
|
|
//
|
|
// 3. MathFixup (required): Some accelerators might have an incomplete
|
|
// implementation for the intrinsics used to implement some of the math
|
|
// functions in <cmath> / their corresponding libcall lowerings. Since this
|
|
// can vary quite significantly between accelerators, we replace calls to a
|
|
// set of intrinsics / lib functions known to be problematic with calls to a
|
|
// HIPSTDPAR specific forwarding layer, which gives an uniform interface for
|
|
// accelerators to implement in their own runtime components. This pass
|
|
// should run before AcceleratorCodeSelection so as to prevent the spurious
|
|
// removal of the HIPSTDPAR specific forwarding functions.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
|
|
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/ADT/SmallPtrSet.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/Analysis/CallGraph.h"
|
|
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
|
#include "llvm/IR/Constants.h"
|
|
#include "llvm/IR/Function.h"
|
|
#include "llvm/IR/IRBuilder.h"
|
|
#include "llvm/IR/Intrinsics.h"
|
|
#include "llvm/IR/Module.h"
|
|
#include "llvm/Transforms/Utils/ModuleUtils.h"
|
|
|
|
#include <cassert>
|
|
#include <string>
|
|
#include <utility>
|
|
|
|
using namespace llvm;
|
|
|
|
template<typename T>
|
|
static inline void eraseFromModule(T &ToErase) {
|
|
ToErase.replaceAllUsesWith(PoisonValue::get(ToErase.getType()));
|
|
ToErase.eraseFromParent();
|
|
}
|
|
|
|
static bool checkIfSupported(GlobalVariable &G) {
|
|
if (!G.isThreadLocal())
|
|
return true;
|
|
|
|
G.dropDroppableUses();
|
|
|
|
if (!G.isConstantUsed())
|
|
return true;
|
|
|
|
std::string W;
|
|
raw_string_ostream OS(W);
|
|
|
|
OS << "Accelerator does not support the thread_local variable "
|
|
<< G.getName();
|
|
|
|
Instruction *I = nullptr;
|
|
SmallVector<User *> Tmp(G.users());
|
|
SmallPtrSet<User *, 5> Visited;
|
|
do {
|
|
auto U = std::move(Tmp.back());
|
|
Tmp.pop_back();
|
|
|
|
if (!Visited.insert(U).second)
|
|
continue;
|
|
|
|
if (isa<Instruction>(U))
|
|
I = cast<Instruction>(U);
|
|
else
|
|
Tmp.insert(Tmp.end(), U->user_begin(), U->user_end());
|
|
} while (!I && !Tmp.empty());
|
|
|
|
assert(I && "thread_local global should have at least one non-constant use.");
|
|
|
|
G.getContext().diagnose(
|
|
DiagnosticInfoUnsupported(*I->getParent()->getParent(), W,
|
|
I->getDebugLoc(), DS_Error));
|
|
|
|
return false;
|
|
}
|
|
|
|
static inline void clearModule(Module &M) { // TODO: simplify.
|
|
while (!M.functions().empty())
|
|
eraseFromModule(*M.begin());
|
|
while (!M.globals().empty())
|
|
eraseFromModule(*M.globals().begin());
|
|
while (!M.aliases().empty())
|
|
eraseFromModule(*M.aliases().begin());
|
|
while (!M.ifuncs().empty())
|
|
eraseFromModule(*M.ifuncs().begin());
|
|
}
|
|
|
|
static SmallVector<std::reference_wrapper<Use>>
|
|
collectIndirectableUses(GlobalVariable *G) {
|
|
// We are interested only in use chains that end in an Instruction.
|
|
SmallVector<std::reference_wrapper<Use>> Uses;
|
|
|
|
SmallVector<std::reference_wrapper<Use>> Stack(G->use_begin(), G->use_end());
|
|
while (!Stack.empty()) {
|
|
Use &U = Stack.pop_back_val();
|
|
if (isa<Instruction>(U.getUser()))
|
|
Uses.emplace_back(U);
|
|
else
|
|
transform(U.getUser()->uses(), std::back_inserter(Stack),
|
|
[](auto &&U) { return std::ref(U); });
|
|
}
|
|
|
|
return Uses;
|
|
}
|
|
|
|
static inline GlobalVariable *getGlobalForName(GlobalVariable *G) {
|
|
// Create an anonymous global which stores the variable's name, which will be
|
|
// used by the HIPSTDPAR runtime to look up the program-wide symbol.
|
|
LLVMContext &Ctx = G->getContext();
|
|
auto *CDS = ConstantDataArray::getString(Ctx, G->getName());
|
|
|
|
GlobalVariable *N = G->getParent()->getOrInsertGlobal("", CDS->getType());
|
|
N->setInitializer(CDS);
|
|
N->setLinkage(GlobalValue::LinkageTypes::PrivateLinkage);
|
|
N->setConstant(true);
|
|
|
|
return N;
|
|
}
|
|
|
|
static inline GlobalVariable *getIndirectionGlobal(Module *M) {
|
|
// Create an anonymous global which stores a pointer to a pointer, which will
|
|
// be externally initialised by the HIPSTDPAR runtime with the address of the
|
|
// program-wide symbol.
|
|
Type *PtrTy = PointerType::get(
|
|
M->getContext(), M->getDataLayout().getDefaultGlobalsAddressSpace());
|
|
GlobalVariable *NewG = M->getOrInsertGlobal("", PtrTy);
|
|
|
|
NewG->setInitializer(PoisonValue::get(NewG->getValueType()));
|
|
NewG->setLinkage(GlobalValue::LinkageTypes::PrivateLinkage);
|
|
NewG->setConstant(true);
|
|
NewG->setExternallyInitialized(true);
|
|
|
|
return NewG;
|
|
}
|
|
|
|
static Constant *
|
|
appendIndirectedGlobal(const GlobalVariable *IndirectionTable,
|
|
SmallVector<Constant *> &SymbolIndirections,
|
|
GlobalVariable *ToIndirect) {
|
|
Module *M = ToIndirect->getParent();
|
|
|
|
auto *InitTy = cast<StructType>(IndirectionTable->getValueType());
|
|
auto *SymbolListTy = cast<StructType>(InitTy->getStructElementType(2));
|
|
Type *NameTy = SymbolListTy->getElementType(0);
|
|
Type *IndirectTy = SymbolListTy->getElementType(1);
|
|
|
|
Constant *NameG = getGlobalForName(ToIndirect);
|
|
Constant *IndirectG = getIndirectionGlobal(M);
|
|
Constant *Entry = ConstantStruct::get(
|
|
SymbolListTy, {ConstantExpr::getAddrSpaceCast(NameG, NameTy),
|
|
ConstantExpr::getAddrSpaceCast(IndirectG, IndirectTy)});
|
|
SymbolIndirections.push_back(Entry);
|
|
|
|
return IndirectG;
|
|
}
|
|
|
|
static void fillIndirectionTable(GlobalVariable *IndirectionTable,
|
|
SmallVector<Constant *> Indirections) {
|
|
Module *M = IndirectionTable->getParent();
|
|
size_t SymCnt = Indirections.size();
|
|
|
|
auto *InitTy = cast<StructType>(IndirectionTable->getValueType());
|
|
Type *SymbolListTy = InitTy->getStructElementType(1);
|
|
auto *SymbolTy = cast<StructType>(InitTy->getStructElementType(2));
|
|
|
|
Constant *Count = ConstantInt::get(InitTy->getStructElementType(0), SymCnt);
|
|
M->removeGlobalVariable(IndirectionTable);
|
|
GlobalVariable *Symbols =
|
|
M->getOrInsertGlobal("", ArrayType::get(SymbolTy, SymCnt));
|
|
Symbols->setLinkage(GlobalValue::LinkageTypes::PrivateLinkage);
|
|
Symbols->setInitializer(
|
|
ConstantArray::get(ArrayType::get(SymbolTy, SymCnt), {Indirections}));
|
|
Symbols->setConstant(true);
|
|
|
|
Constant *ASCSymbols = ConstantExpr::getAddrSpaceCast(Symbols, SymbolListTy);
|
|
Constant *Init = ConstantStruct::get(
|
|
InitTy, {Count, ASCSymbols, PoisonValue::get(SymbolTy)});
|
|
M->insertGlobalVariable(IndirectionTable);
|
|
IndirectionTable->setInitializer(Init);
|
|
}
|
|
|
|
static void replaceWithIndirectUse(const Use &U, const GlobalVariable *G,
|
|
Constant *IndirectedG) {
|
|
auto *I = cast<Instruction>(U.getUser());
|
|
|
|
IRBuilder<> Builder(I);
|
|
unsigned OpIdx = U.getOperandNo();
|
|
Value *Op = I->getOperand(OpIdx);
|
|
|
|
// We walk back up the use chain, which could be an arbitrarily long sequence
|
|
// of constexpr AS casts, ptr-to-int and GEP instructions, until we reach the
|
|
// indirected global.
|
|
while (auto *CE = dyn_cast<ConstantExpr>(Op)) {
|
|
assert((CE->getOpcode() == Instruction::GetElementPtr ||
|
|
CE->getOpcode() == Instruction::AddrSpaceCast ||
|
|
CE->getOpcode() == Instruction::PtrToInt) &&
|
|
"Only GEP, ASCAST or PTRTOINT constant uses supported!");
|
|
|
|
Instruction *NewI = Builder.Insert(CE->getAsInstruction());
|
|
I->replaceUsesOfWith(Op, NewI);
|
|
I = NewI;
|
|
Op = I->getOperand(0);
|
|
OpIdx = 0;
|
|
Builder.SetInsertPoint(I);
|
|
}
|
|
|
|
assert(Op == G && "Must reach indirected global!");
|
|
|
|
I->setOperand(OpIdx, Builder.CreateLoad(G->getType(), IndirectedG));
|
|
}
|
|
|
|
static inline bool isValidIndirectionTable(GlobalVariable *IndirectionTable) {
|
|
std::string W;
|
|
raw_string_ostream OS(W);
|
|
|
|
Type *Ty = IndirectionTable->getValueType();
|
|
bool Valid = false;
|
|
|
|
if (!isa<StructType>(Ty)) {
|
|
OS << "The Indirection Table must be a struct type; ";
|
|
Ty->print(OS);
|
|
OS << " is incorrect.\n";
|
|
} else if (cast<StructType>(Ty)->getNumElements() != 3u) {
|
|
OS << "The Indirection Table must have 3 elements; "
|
|
<< cast<StructType>(Ty)->getNumElements() << " is incorrect.\n";
|
|
} else if (!isa<IntegerType>(cast<StructType>(Ty)->getStructElementType(0))) {
|
|
OS << "The first element in the Indirection Table must be an integer; ";
|
|
cast<StructType>(Ty)->getStructElementType(0)->print(OS);
|
|
OS << " is incorrect.\n";
|
|
} else if (!isa<PointerType>(cast<StructType>(Ty)->getStructElementType(1))) {
|
|
OS << "The second element in the Indirection Table must be a pointer; ";
|
|
cast<StructType>(Ty)->getStructElementType(1)->print(OS);
|
|
OS << " is incorrect.\n";
|
|
} else if (!isa<StructType>(cast<StructType>(Ty)->getStructElementType(2))) {
|
|
OS << "The third element in the Indirection Table must be a struct type; ";
|
|
cast<StructType>(Ty)->getStructElementType(2)->print(OS);
|
|
OS << " is incorrect.\n";
|
|
} else {
|
|
Valid = true;
|
|
}
|
|
|
|
if (!Valid)
|
|
IndirectionTable->getContext().diagnose(DiagnosticInfoGeneric(W, DS_Error));
|
|
|
|
return Valid;
|
|
}
|
|
|
|
static void indirectGlobals(GlobalVariable *IndirectionTable,
|
|
SmallVector<GlobalVariable *> ToIndirect) {
|
|
// We replace globals with an indirected access via a pointer that will get
|
|
// set by the HIPSTDPAR runtime, using their accessible, program-wide unique
|
|
// address as set by the host linker-loader.
|
|
SmallVector<Constant *> SymbolIndirections;
|
|
for (auto &&G : ToIndirect) {
|
|
SmallVector<std::reference_wrapper<Use>> Uses = collectIndirectableUses(G);
|
|
|
|
if (Uses.empty())
|
|
continue;
|
|
|
|
Constant *IndirectedGlobal =
|
|
appendIndirectedGlobal(IndirectionTable, SymbolIndirections, G);
|
|
|
|
for_each(Uses,
|
|
[=](auto &&U) { replaceWithIndirectUse(U, G, IndirectedGlobal); });
|
|
|
|
eraseFromModule(*G);
|
|
}
|
|
|
|
if (SymbolIndirections.empty())
|
|
return;
|
|
|
|
fillIndirectionTable(IndirectionTable, std::move(SymbolIndirections));
|
|
}
|
|
|
|
static inline void maybeHandleGlobals(Module &M) {
|
|
unsigned GlobAS = M.getDataLayout().getDefaultGlobalsAddressSpace();
|
|
|
|
SmallVector<GlobalVariable *> ToIndirect;
|
|
for (auto &&G : M.globals()) {
|
|
if (!checkIfSupported(G))
|
|
return clearModule(M);
|
|
if (G.getAddressSpace() != GlobAS)
|
|
continue;
|
|
if (G.isConstant() && G.hasInitializer() && G.hasAtLeastLocalUnnamedAddr())
|
|
continue;
|
|
|
|
ToIndirect.push_back(&G);
|
|
}
|
|
|
|
if (ToIndirect.empty())
|
|
return;
|
|
|
|
if (auto *IT = M.getNamedGlobal("__hipstdpar_symbol_indirection_table")) {
|
|
if (!isValidIndirectionTable(IT))
|
|
return clearModule(M);
|
|
return indirectGlobals(IT, std::move(ToIndirect));
|
|
} else {
|
|
for (auto &&G : ToIndirect) {
|
|
// We will internalise these, so we provide a poison initialiser.
|
|
if (!G->hasInitializer())
|
|
G->setInitializer(PoisonValue::get(G->getValueType()));
|
|
}
|
|
}
|
|
}
|
|
|
|
template<unsigned N>
|
|
static inline void removeUnreachableFunctions(
|
|
const SmallPtrSet<const Function *, N>& Reachable, Module &M) {
|
|
removeFromUsedLists(M, [&](Constant *C) {
|
|
if (auto F = dyn_cast<Function>(C))
|
|
return !Reachable.contains(F);
|
|
|
|
return false;
|
|
});
|
|
|
|
SmallVector<std::reference_wrapper<Function>> ToRemove;
|
|
copy_if(M, std::back_inserter(ToRemove), [&](auto &&F) {
|
|
return !F.isIntrinsic() && !Reachable.contains(&F);
|
|
});
|
|
|
|
for_each(ToRemove, eraseFromModule<Function>);
|
|
}
|
|
|
|
static inline bool isAcceleratorExecutionRoot(const Function *F) {
|
|
if (!F)
|
|
return false;
|
|
|
|
return F->getCallingConv() == CallingConv::AMDGPU_KERNEL;
|
|
}
|
|
|
|
static inline bool checkIfSupported(const Function *F, const CallBase *CB) {
|
|
const auto Dx = F->getName().rfind("__hipstdpar_unsupported");
|
|
|
|
if (Dx == StringRef::npos)
|
|
return true;
|
|
|
|
const auto N = F->getName().substr(0, Dx);
|
|
|
|
std::string W;
|
|
raw_string_ostream OS(W);
|
|
|
|
if (N == "__ASM")
|
|
OS << "Accelerator does not support the ASM block:\n"
|
|
<< cast<ConstantDataArray>(CB->getArgOperand(0))->getAsCString();
|
|
else
|
|
OS << "Accelerator does not support the " << N << " function.";
|
|
|
|
auto Caller = CB->getParent()->getParent();
|
|
|
|
Caller->getContext().diagnose(
|
|
DiagnosticInfoUnsupported(*Caller, W, CB->getDebugLoc(), DS_Error));
|
|
|
|
return false;
|
|
}
|
|
|
|
PreservedAnalyses
|
|
HipStdParAcceleratorCodeSelectionPass::run(Module &M,
|
|
ModuleAnalysisManager &MAM) {
|
|
auto &CGA = MAM.getResult<CallGraphAnalysis>(M);
|
|
|
|
SmallPtrSet<const Function *, 32> Reachable;
|
|
for (auto &&CGN : CGA) {
|
|
if (!isAcceleratorExecutionRoot(CGN.first))
|
|
continue;
|
|
|
|
Reachable.insert(CGN.first);
|
|
|
|
SmallVector<const Function *> Tmp({CGN.first});
|
|
do {
|
|
auto F = std::move(Tmp.back());
|
|
Tmp.pop_back();
|
|
|
|
for (auto &&N : *CGA[F]) {
|
|
if (!N.second)
|
|
continue;
|
|
if (!N.second->getFunction())
|
|
continue;
|
|
if (Reachable.contains(N.second->getFunction()))
|
|
continue;
|
|
|
|
if (!checkIfSupported(N.second->getFunction(),
|
|
dyn_cast<CallBase>(*N.first)))
|
|
return PreservedAnalyses::none();
|
|
|
|
Reachable.insert(N.second->getFunction());
|
|
Tmp.push_back(N.second->getFunction());
|
|
}
|
|
} while (!std::empty(Tmp));
|
|
}
|
|
|
|
if (std::empty(Reachable))
|
|
clearModule(M);
|
|
else
|
|
removeUnreachableFunctions(Reachable, M);
|
|
|
|
maybeHandleGlobals(M);
|
|
|
|
return PreservedAnalyses::none();
|
|
}
|
|
|
|
static constexpr std::pair<StringLiteral, StringLiteral> ReplaceMap[]{
|
|
{"aligned_alloc", "__hipstdpar_aligned_alloc"},
|
|
{"calloc", "__hipstdpar_calloc"},
|
|
{"free", "__hipstdpar_free"},
|
|
{"malloc", "__hipstdpar_malloc"},
|
|
{"memalign", "__hipstdpar_aligned_alloc"},
|
|
{"mmap", "__hipstdpar_mmap"},
|
|
{"munmap", "__hipstdpar_munmap"},
|
|
{"posix_memalign", "__hipstdpar_posix_aligned_alloc"},
|
|
{"realloc", "__hipstdpar_realloc"},
|
|
{"reallocarray", "__hipstdpar_realloc_array"},
|
|
{"_ZdaPv", "__hipstdpar_operator_delete"},
|
|
{"_ZdaPvm", "__hipstdpar_operator_delete_sized"},
|
|
{"_ZdaPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"},
|
|
{"_ZdaPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"},
|
|
{"_ZdlPv", "__hipstdpar_operator_delete"},
|
|
{"_ZdlPvm", "__hipstdpar_operator_delete_sized"},
|
|
{"_ZdlPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"},
|
|
{"_ZdlPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"},
|
|
{"_Znam", "__hipstdpar_operator_new"},
|
|
{"_ZnamRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"},
|
|
{"_ZnamSt11align_val_t", "__hipstdpar_operator_new_aligned"},
|
|
{"_ZnamSt11align_val_tRKSt9nothrow_t",
|
|
"__hipstdpar_operator_new_aligned_nothrow"},
|
|
|
|
{"_Znwm", "__hipstdpar_operator_new"},
|
|
{"_ZnwmRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"},
|
|
{"_ZnwmSt11align_val_t", "__hipstdpar_operator_new_aligned"},
|
|
{"_ZnwmSt11align_val_tRKSt9nothrow_t",
|
|
"__hipstdpar_operator_new_aligned_nothrow"},
|
|
{"__builtin_calloc", "__hipstdpar_calloc"},
|
|
{"__builtin_free", "__hipstdpar_free"},
|
|
{"__builtin_malloc", "__hipstdpar_malloc"},
|
|
{"__builtin_operator_delete", "__hipstdpar_operator_delete"},
|
|
{"__builtin_operator_new", "__hipstdpar_operator_new"},
|
|
{"__builtin_realloc", "__hipstdpar_realloc"},
|
|
{"__libc_calloc", "__hipstdpar_calloc"},
|
|
{"__libc_free", "__hipstdpar_free"},
|
|
{"__libc_malloc", "__hipstdpar_malloc"},
|
|
{"__libc_memalign", "__hipstdpar_aligned_alloc"},
|
|
{"__libc_realloc", "__hipstdpar_realloc"}};
|
|
|
|
static constexpr std::pair<StringLiteral, StringLiteral> HiddenMap[]{
|
|
// hidden_malloc and hidden_free are only kept for backwards compatibility /
|
|
// legacy purposes, and we should remove them in the future
|
|
{"__hipstdpar_hidden_malloc", "__libc_malloc"},
|
|
{"__hipstdpar_hidden_free", "__libc_free"},
|
|
{"__hipstdpar_hidden_memalign", "__libc_memalign"},
|
|
{"__hipstdpar_hidden_mmap", "mmap"},
|
|
{"__hipstdpar_hidden_munmap", "munmap"}};
|
|
|
|
PreservedAnalyses
|
|
HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) {
|
|
SmallDenseMap<StringRef, StringRef> AllocReplacements(std::cbegin(ReplaceMap),
|
|
std::cend(ReplaceMap));
|
|
|
|
for (auto &&F : M) {
|
|
if (!F.hasName())
|
|
continue;
|
|
auto It = AllocReplacements.find(F.getName());
|
|
if (It == AllocReplacements.end())
|
|
continue;
|
|
|
|
if (auto R = M.getFunction(It->second)) {
|
|
F.replaceAllUsesWith(R);
|
|
} else {
|
|
std::string W;
|
|
raw_string_ostream OS(W);
|
|
|
|
OS << "cannot be interposed, missing: " << AllocReplacements[F.getName()]
|
|
<< ". Tried to run the allocation interposition pass without the "
|
|
<< "replacement functions available.";
|
|
|
|
F.getContext().diagnose(DiagnosticInfoUnsupported(F, W,
|
|
F.getSubprogram(),
|
|
DS_Warning));
|
|
}
|
|
}
|
|
|
|
for (auto &&HR : HiddenMap) {
|
|
if (auto F = M.getFunction(HR.first)) {
|
|
auto R = M.getOrInsertFunction(HR.second, F->getFunctionType(),
|
|
F->getAttributes());
|
|
F->replaceAllUsesWith(R.getCallee());
|
|
|
|
eraseFromModule(*F);
|
|
}
|
|
}
|
|
|
|
return PreservedAnalyses::none();
|
|
}
|
|
|
|
static constexpr std::pair<StringLiteral, StringLiteral> MathLibToHipStdPar[]{
|
|
{"acosh", "__hipstdpar_acosh_f64"},
|
|
{"acoshf", "__hipstdpar_acosh_f32"},
|
|
{"asinh", "__hipstdpar_asinh_f64"},
|
|
{"asinhf", "__hipstdpar_asinh_f32"},
|
|
{"atanh", "__hipstdpar_atanh_f64"},
|
|
{"atanhf", "__hipstdpar_atanh_f32"},
|
|
{"cbrt", "__hipstdpar_cbrt_f64"},
|
|
{"cbrtf", "__hipstdpar_cbrt_f32"},
|
|
{"erf", "__hipstdpar_erf_f64"},
|
|
{"erff", "__hipstdpar_erf_f32"},
|
|
{"erfc", "__hipstdpar_erfc_f64"},
|
|
{"erfcf", "__hipstdpar_erfc_f32"},
|
|
{"fdim", "__hipstdpar_fdim_f64"},
|
|
{"fdimf", "__hipstdpar_fdim_f32"},
|
|
{"expm1", "__hipstdpar_expm1_f64"},
|
|
{"expm1f", "__hipstdpar_expm1_f32"},
|
|
{"hypot", "__hipstdpar_hypot_f64"},
|
|
{"hypotf", "__hipstdpar_hypot_f32"},
|
|
{"ilogb", "__hipstdpar_ilogb_f64"},
|
|
{"ilogbf", "__hipstdpar_ilogb_f32"},
|
|
{"lgamma", "__hipstdpar_lgamma_f64"},
|
|
{"lgammaf", "__hipstdpar_lgamma_f32"},
|
|
{"log1p", "__hipstdpar_log1p_f64"},
|
|
{"log1pf", "__hipstdpar_log1p_f32"},
|
|
{"logb", "__hipstdpar_logb_f64"},
|
|
{"logbf", "__hipstdpar_logb_f32"},
|
|
{"nextafter", "__hipstdpar_nextafter_f64"},
|
|
{"nextafterf", "__hipstdpar_nextafter_f32"},
|
|
{"nexttoward", "__hipstdpar_nexttoward_f64"},
|
|
{"nexttowardf", "__hipstdpar_nexttoward_f32"},
|
|
{"remainder", "__hipstdpar_remainder_f64"},
|
|
{"remainderf", "__hipstdpar_remainder_f32"},
|
|
{"remquo", "__hipstdpar_remquo_f64"},
|
|
{"remquof", "__hipstdpar_remquo_f32"},
|
|
{"scalbln", "__hipstdpar_scalbln_f64"},
|
|
{"scalblnf", "__hipstdpar_scalbln_f32"},
|
|
{"scalbn", "__hipstdpar_scalbn_f64"},
|
|
{"scalbnf", "__hipstdpar_scalbn_f32"},
|
|
{"tgamma", "__hipstdpar_tgamma_f64"},
|
|
{"tgammaf", "__hipstdpar_tgamma_f32"}};
|
|
|
|
PreservedAnalyses HipStdParMathFixupPass::run(Module &M,
|
|
ModuleAnalysisManager &) {
|
|
if (M.empty())
|
|
return PreservedAnalyses::all();
|
|
|
|
SmallVector<std::pair<Function *, std::string>> ToReplace;
|
|
for (auto &&F : M) {
|
|
if (!F.hasName())
|
|
continue;
|
|
|
|
StringRef N = F.getName();
|
|
Intrinsic::ID ID = F.getIntrinsicID();
|
|
|
|
switch (ID) {
|
|
case Intrinsic::not_intrinsic: {
|
|
auto It =
|
|
find_if(MathLibToHipStdPar, [&](auto &&M) { return M.first == N; });
|
|
if (It == std::cend(MathLibToHipStdPar))
|
|
continue;
|
|
ToReplace.emplace_back(&F, It->second);
|
|
break;
|
|
}
|
|
case Intrinsic::acos:
|
|
case Intrinsic::asin:
|
|
case Intrinsic::atan:
|
|
case Intrinsic::atan2:
|
|
case Intrinsic::cosh:
|
|
case Intrinsic::modf:
|
|
case Intrinsic::sinh:
|
|
case Intrinsic::tan:
|
|
case Intrinsic::tanh:
|
|
break;
|
|
default: {
|
|
if (F.getReturnType()->isDoubleTy()) {
|
|
switch (ID) {
|
|
case Intrinsic::cos:
|
|
case Intrinsic::exp:
|
|
case Intrinsic::exp2:
|
|
case Intrinsic::log:
|
|
case Intrinsic::log10:
|
|
case Intrinsic::log2:
|
|
case Intrinsic::pow:
|
|
case Intrinsic::sin:
|
|
break;
|
|
default:
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
|
|
ToReplace.emplace_back(&F, N);
|
|
llvm::replace(ToReplace.back().second, '.', '_');
|
|
StringRef Prefix = "llvm";
|
|
ToReplace.back().second.replace(0, Prefix.size(), "__hipstdpar");
|
|
}
|
|
for (auto &&[F, NewF] : ToReplace)
|
|
F->replaceAllUsesWith(
|
|
M.getOrInsertFunction(NewF, F->getFunctionType()).getCallee());
|
|
|
|
return PreservedAnalyses::none();
|
|
}
|