[CGData] Global Merge Functions (#112671)
This implements a global function merging pass. Unlike traditional function merging passes that use IR comparators, this pass employs a structurally stable hash to identify similar functions while ignoring certain constant operands. These ignored constants are tracked and encoded into a stable function summary. When merging, instead of explicitly folding similar functions and their call sites, we form a merging instance by supplying different parameters via thunks. The actual size reduction occurs when identically created merging instances are folded by the linker. Currently, this pass is wired to a pre-codegen pass, enabled by the `-enable-global-merge-func` flag. In a local merging mode, the analysis and merging steps occur sequentially within a module: - `analyze`: Collects stable function hashes and tracks locations of ignored constant operands. - `finalize`: Identifies merge candidates with matching hashes and computes the set of parameters that point to different constants. - `merge`: Uses the stable function map to optimistically create a merged function. We can enable a global merging mode similar to the global function outliner (https://discourse.llvm.org/t/rfc-enhanced-machine-outliner-part-2-thinlto-nolto/78753/), which will perform the above steps separately. - `-codegen-data-generate`: During the first round of code generation, we analyze local merging instances and publish their summaries. - Offline using `llvm-cgdata` or at link-time, we can finalize all these merging summaries that are combined to determine parameters. - `-codegen-data-use`: During the second round of code generation, we optimistically create merging instances within each module, and finally, the linker folds identically created merging instances. Depends on #112664 This is a patch for https://discourse.llvm.org/t/rfc-global-function-merging/82608.
This commit is contained in:
parent
6e614e11df
commit
d23c5c2d65
@ -145,6 +145,9 @@ public:
|
||||
const OutlinedHashTree *getOutlinedHashTree() {
|
||||
return PublishedHashTree.get();
|
||||
}
|
||||
const StableFunctionMap *getStableFunctionMap() {
|
||||
return PublishedStableFunctionMap.get();
|
||||
}
|
||||
|
||||
/// Returns true if we should write codegen data.
|
||||
bool emitCGData() { return EmitCGData; }
|
||||
@ -169,10 +172,18 @@ inline bool hasOutlinedHashTree() {
|
||||
return CodeGenData::getInstance().hasOutlinedHashTree();
|
||||
}
|
||||
|
||||
inline bool hasStableFunctionMap() {
|
||||
return CodeGenData::getInstance().hasStableFunctionMap();
|
||||
}
|
||||
|
||||
inline const OutlinedHashTree *getOutlinedHashTree() {
|
||||
return CodeGenData::getInstance().getOutlinedHashTree();
|
||||
}
|
||||
|
||||
inline const StableFunctionMap *getStableFunctionMap() {
|
||||
return CodeGenData::getInstance().getStableFunctionMap();
|
||||
}
|
||||
|
||||
inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); }
|
||||
|
||||
inline void
|
||||
|
@ -110,7 +110,7 @@ struct StableFunctionMap {
|
||||
size_t size(SizeType Type = UniqueHashCount) const;
|
||||
|
||||
/// Finalize the stable function map by trimming content.
|
||||
void finalize();
|
||||
void finalize(bool SkipTrim = false);
|
||||
|
||||
private:
|
||||
/// Insert a `StableFunctionEntry` into the function map directly. This
|
||||
|
@ -49,7 +49,7 @@ struct StableFunctionMapRecord {
|
||||
void deserializeYAML(yaml::Input &YIS);
|
||||
|
||||
/// Finalize the stable function map by trimming content.
|
||||
void finalize() { FunctionMap->finalize(); }
|
||||
void finalize(bool SkipTrim = false) { FunctionMap->finalize(SkipTrim); }
|
||||
|
||||
/// Merge the stable function map into this one.
|
||||
void merge(const StableFunctionMapRecord &Other) {
|
||||
|
85
llvm/include/llvm/CodeGen/GlobalMergeFunctions.h
Normal file
85
llvm/include/llvm/CodeGen/GlobalMergeFunctions.h
Normal file
@ -0,0 +1,85 @@
|
||||
//===------ GlobalMergeFunctions.h - Global merge functions -----*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass defines the implementation of a function merging mechanism
|
||||
// that utilizes a stable function hash to track differences in constants and
|
||||
// identify potential merge candidates. The process involves two rounds:
|
||||
// 1. The first round collects stable function hashes and identifies merge
|
||||
// candidates with matching hashes. It also computes the set of parameters
|
||||
// that point to different constants during the stable function merge.
|
||||
// 2. The second round leverages this collected global function information to
|
||||
// optimistically create a merged function in each module context, ensuring
|
||||
// correct transformation.
|
||||
// Similar to the global outliner, this approach uses the linker's deduplication
|
||||
// (ICF) to fold identical merged functions, thereby reducing the final binary
|
||||
// size. The work is inspired by the concepts discussed in the following paper:
|
||||
// https://dl.acm.org/doi/pdf/10.1145/3652032.3657575.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_CODEGEN_GLOBALMERGEFUNCTIONS_H
|
||||
#define LLVM_CODEGEN_GLOBALMERGEFUNCTIONS_H
|
||||
|
||||
#include "llvm/CGData/StableFunctionMap.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/Pass.h"
|
||||
|
||||
enum class HashFunctionMode {
|
||||
Local,
|
||||
BuildingHashFuncion,
|
||||
UsingHashFunction,
|
||||
};
|
||||
|
||||
namespace llvm {
|
||||
|
||||
// A vector of locations (the pair of (instruction, operand) indices) reachable
|
||||
// from a parameter.
|
||||
using ParamLocs = SmallVector<IndexPair, 4>;
|
||||
// A vector of parameters
|
||||
using ParamLocsVecTy = SmallVector<ParamLocs, 8>;
|
||||
|
||||
/// GlobalMergeFunc is a ModulePass that implements a function merging mechanism
|
||||
/// using stable function hashes. It identifies and merges functions with
|
||||
/// matching hashes across modules to optimize binary size.
|
||||
class GlobalMergeFunc {
|
||||
HashFunctionMode MergerMode = HashFunctionMode::Local;
|
||||
|
||||
std::unique_ptr<StableFunctionMap> LocalFunctionMap;
|
||||
|
||||
const ModuleSummaryIndex *Index;
|
||||
|
||||
public:
|
||||
/// The suffix used to identify the merged function that parameterizes
|
||||
/// the constant values. Note that the original function, without this suffix,
|
||||
/// becomes a thunk supplying contexts to the merged function via parameters.
|
||||
static constexpr const char MergingInstanceSuffix[] = ".Tgm";
|
||||
|
||||
GlobalMergeFunc(const ModuleSummaryIndex *Index) : Index(Index) {};
|
||||
|
||||
void initializeMergerMode(const Module &M);
|
||||
|
||||
bool run(Module &M);
|
||||
|
||||
/// Analyze module to create stable function into LocalFunctionMap.
|
||||
void analyze(Module &M);
|
||||
|
||||
/// Emit LocalFunctionMap into __llvm_merge section.
|
||||
void emitFunctionMap(Module &M);
|
||||
|
||||
/// Merge functions in the module using the given function map.
|
||||
bool merge(Module &M, const StableFunctionMap *FunctionMap);
|
||||
};
|
||||
|
||||
/// Global function merging pass for new pass manager.
|
||||
struct GlobalMergeFuncPass : public PassInfoMixin<GlobalMergeFuncPass> {
|
||||
PreservedAnalyses run(Module &M, AnalysisManager<Module> &);
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
#endif // LLVM_CODEGEN_GLOBALMERGEFUNCTIONS_H
|
@ -507,6 +507,9 @@ namespace llvm {
|
||||
/// This pass frees the memory occupied by the MachineFunction.
|
||||
FunctionPass *createFreeMachineFunctionPass();
|
||||
|
||||
/// This pass performs merging similar functions globally.
|
||||
ModulePass *createGlobalMergeFuncPass();
|
||||
|
||||
/// This pass performs outlining on machine instructions directly before
|
||||
/// printing assembly.
|
||||
ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions = true);
|
||||
|
@ -123,6 +123,7 @@ void initializeGCEmptyBasicBlocksPass(PassRegistry &);
|
||||
void initializeGCMachineCodeAnalysisPass(PassRegistry &);
|
||||
void initializeGCModuleInfoPass(PassRegistry &);
|
||||
void initializeGVNLegacyPassPass(PassRegistry &);
|
||||
void initializeGlobalMergeFuncPassWrapperPass(PassRegistry &);
|
||||
void initializeGlobalMergePass(PassRegistry &);
|
||||
void initializeGlobalsAAWrapperPassPass(PassRegistry &);
|
||||
void initializeHardwareLoopsLegacyPass(PassRegistry &);
|
||||
|
@ -79,6 +79,7 @@ struct ForcePassLinking {
|
||||
(void)llvm::createDomOnlyViewerWrapperPassPass();
|
||||
(void)llvm::createDomViewerWrapperPassPass();
|
||||
(void)llvm::createAlwaysInlinerLegacyPass();
|
||||
(void)llvm::createGlobalMergeFuncPass();
|
||||
(void)llvm::createGlobalsAAWrapperPass();
|
||||
(void)llvm::createInstSimplifyLegacyPass();
|
||||
(void)llvm::createInstructionCombiningPass();
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include "llvm/CodeGen/FinalizeISel.h"
|
||||
#include "llvm/CodeGen/GCMetadata.h"
|
||||
#include "llvm/CodeGen/GlobalMerge.h"
|
||||
#include "llvm/CodeGen/GlobalMergeFunctions.h"
|
||||
#include "llvm/CodeGen/IndirectBrExpand.h"
|
||||
#include "llvm/CodeGen/InterleavedAccess.h"
|
||||
#include "llvm/CodeGen/InterleavedLoadCombine.h"
|
||||
@ -713,6 +714,9 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addIRPasses(
|
||||
// Convert conditional moves to conditional jumps when profitable.
|
||||
if (getOptLevel() != CodeGenOptLevel::None && !Opt.DisableSelectOptimize)
|
||||
addPass(SelectOptimizePass(&TM));
|
||||
|
||||
if (Opt.EnableGlobalMergeFunc)
|
||||
addPass(GlobalMergeFuncPass());
|
||||
}
|
||||
|
||||
/// Turn exception handling constructs into something the code generators can
|
||||
|
@ -29,6 +29,7 @@ MODULE_PASS("jmc-instrumenter", JMCInstrumenterPass())
|
||||
MODULE_PASS("lower-emutls", LowerEmuTLSPass())
|
||||
MODULE_PASS("pre-isel-intrinsic-lowering", PreISelIntrinsicLoweringPass())
|
||||
MODULE_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass())
|
||||
MODULE_PASS("global-merge-func", GlobalMergeFuncPass())
|
||||
#undef MODULE_PASS
|
||||
|
||||
#ifndef FUNCTION_ANALYSIS
|
||||
|
@ -31,6 +31,7 @@ struct CGPassBuilderOption {
|
||||
bool DisableVerify = false;
|
||||
bool EnableImplicitNullChecks = false;
|
||||
bool EnableBlockPlacementStats = false;
|
||||
bool EnableGlobalMergeFunc = false;
|
||||
bool EnableMachineFunctionSplitter = false;
|
||||
bool MISchedPostRA = false;
|
||||
bool EarlyLiveIntervals = false;
|
||||
|
@ -14,11 +14,43 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/CGData/StableFunctionMap.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
#define DEBUG_TYPE "stable-function-map"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<unsigned>
|
||||
GlobalMergingMinMerges("global-merging-min-merges",
|
||||
cl::desc("Minimum number of similar functions with "
|
||||
"the same hash required for merging."),
|
||||
cl::init(2), cl::Hidden);
|
||||
static cl::opt<unsigned> GlobalMergingMinInstrs(
|
||||
"global-merging-min-instrs",
|
||||
cl::desc("The minimum instruction count required when merging functions."),
|
||||
cl::init(1), cl::Hidden);
|
||||
static cl::opt<unsigned> GlobalMergingMaxParams(
|
||||
"global-merging-max-params",
|
||||
cl::desc(
|
||||
"The maximum number of parameters allowed when merging functions."),
|
||||
cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden);
|
||||
static cl::opt<unsigned> GlobalMergingParamOverhead(
|
||||
"global-merging-param-overhead",
|
||||
cl::desc("The overhead cost associated with each parameter when merging "
|
||||
"functions."),
|
||||
cl::init(2), cl::Hidden);
|
||||
static cl::opt<unsigned>
|
||||
GlobalMergingCallOverhead("global-merging-call-overhead",
|
||||
cl::desc("The overhead cost associated with each "
|
||||
"function call when merging functions."),
|
||||
cl::init(1), cl::Hidden);
|
||||
static cl::opt<unsigned> GlobalMergingExtraThreshold(
|
||||
"global-merging-extra-threshold",
|
||||
cl::desc("An additional cost threshold that must be exceeded for merging "
|
||||
"to be considered beneficial."),
|
||||
cl::init(0), cl::Hidden);
|
||||
|
||||
unsigned StableFunctionMap::getIdOrCreateForName(StringRef Name) {
|
||||
auto It = NameToId.find(Name);
|
||||
if (It != NameToId.end())
|
||||
@ -117,7 +149,38 @@ static void removeIdenticalIndexPair(
|
||||
SF->IndexOperandHashMap->erase(Pair);
|
||||
}
|
||||
|
||||
void StableFunctionMap::finalize() {
|
||||
static bool isProfitable(
|
||||
const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>>
|
||||
&SFS) {
|
||||
unsigned StableFunctionCount = SFS.size();
|
||||
if (StableFunctionCount < GlobalMergingMinMerges)
|
||||
return false;
|
||||
|
||||
unsigned InstCount = SFS[0]->InstCount;
|
||||
if (InstCount < GlobalMergingMinInstrs)
|
||||
return false;
|
||||
|
||||
unsigned ParamCount = SFS[0]->IndexOperandHashMap->size();
|
||||
if (ParamCount > GlobalMergingMaxParams)
|
||||
return false;
|
||||
|
||||
unsigned Benefit = InstCount * (StableFunctionCount - 1);
|
||||
unsigned Cost =
|
||||
(GlobalMergingParamOverhead * ParamCount + GlobalMergingCallOverhead) *
|
||||
StableFunctionCount +
|
||||
GlobalMergingExtraThreshold;
|
||||
|
||||
bool Result = Benefit > Cost;
|
||||
LLVM_DEBUG(dbgs() << "isProfitable: Hash = " << SFS[0]->Hash << ", "
|
||||
<< "StableFunctionCount = " << StableFunctionCount
|
||||
<< ", InstCount = " << InstCount
|
||||
<< ", ParamCount = " << ParamCount
|
||||
<< ", Benefit = " << Benefit << ", Cost = " << Cost
|
||||
<< ", Result = " << (Result ? "true" : "false") << "\n");
|
||||
return Result;
|
||||
}
|
||||
|
||||
void StableFunctionMap::finalize(bool SkipTrim) {
|
||||
for (auto It = HashToFuncs.begin(); It != HashToFuncs.end(); ++It) {
|
||||
auto &[StableHash, SFS] = *It;
|
||||
|
||||
@ -158,9 +221,15 @@ void StableFunctionMap::finalize() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (SkipTrim)
|
||||
continue;
|
||||
|
||||
// Trim the index pair that has the same operand hash across
|
||||
// stable functions.
|
||||
removeIdenticalIndexPair(SFS);
|
||||
|
||||
if (!isProfitable(SFS))
|
||||
HashToFuncs.erase(It);
|
||||
}
|
||||
|
||||
Finalized = true;
|
||||
|
@ -71,6 +71,7 @@ add_llvm_component_library(LLVMCodeGen
|
||||
GCMetadataPrinter.cpp
|
||||
GCRootLowering.cpp
|
||||
GlobalMerge.cpp
|
||||
GlobalMergeFunctions.cpp
|
||||
HardwareLoops.cpp
|
||||
IfConversion.cpp
|
||||
ImplicitNullChecks.cpp
|
||||
|
672
llvm/lib/CodeGen/GlobalMergeFunctions.cpp
Normal file
672
llvm/lib/CodeGen/GlobalMergeFunctions.cpp
Normal file
@ -0,0 +1,672 @@
|
||||
//===---- GlobalMergeFunctions.cpp - Global merge functions -------*- C++ -===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass implements the global merge function pass.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/CodeGen/GlobalMergeFunctions.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
|
||||
#include "llvm/CGData/CodeGenData.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/StructuralHash.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Transforms/Utils/ModuleUtils.h"
|
||||
|
||||
#define DEBUG_TYPE "global-merge-func"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::support;
|
||||
|
||||
static cl::opt<bool> DisableCGDataForMerging(
|
||||
"disable-cgdata-for-merging", cl::Hidden,
|
||||
cl::desc("Disable codegen data for function merging. Local "
|
||||
"merging is still enabled within a module."),
|
||||
cl::init(false));
|
||||
|
||||
STATISTIC(NumMismatchedFunctionHash,
|
||||
"Number of mismatched function hash for global merge function");
|
||||
STATISTIC(NumMismatchedInstCount,
|
||||
"Number of mismatched instruction count for global merge function");
|
||||
STATISTIC(NumMismatchedConstHash,
|
||||
"Number of mismatched const hash for global merge function");
|
||||
STATISTIC(NumMismatchedModuleId,
|
||||
"Number of mismatched Module Id for global merge function");
|
||||
STATISTIC(NumMergedFunctions,
|
||||
"Number of functions that are actually merged using function hash");
|
||||
STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed");
|
||||
STATISTIC(NumAnalyzedFunctions, "Number of functions that are analyzed");
|
||||
STATISTIC(NumEligibleFunctions, "Number of functions that are eligible");
|
||||
|
||||
/// Returns true if the \OpIdx operand of \p CI is the callee operand.
|
||||
static bool isCalleeOperand(const CallBase *CI, unsigned OpIdx) {
|
||||
return &CI->getCalledOperandUse() == &CI->getOperandUse(OpIdx);
|
||||
}
|
||||
|
||||
static bool canParameterizeCallOperand(const CallBase *CI, unsigned OpIdx) {
|
||||
if (CI->isInlineAsm())
|
||||
return false;
|
||||
Function *Callee = CI->getCalledOperand()
|
||||
? dyn_cast_or_null<Function>(
|
||||
CI->getCalledOperand()->stripPointerCasts())
|
||||
: nullptr;
|
||||
if (Callee) {
|
||||
if (Callee->isIntrinsic())
|
||||
return false;
|
||||
auto Name = Callee->getName();
|
||||
// objc_msgSend stubs must be called, and can't have their address taken.
|
||||
if (Name.starts_with("objc_msgSend$"))
|
||||
return false;
|
||||
// Calls to dtrace probes must generate unique patchpoints.
|
||||
if (Name.starts_with("__dtrace"))
|
||||
return false;
|
||||
}
|
||||
if (isCalleeOperand(CI, OpIdx) &&
|
||||
CI->getOperandBundle(LLVMContext::OB_ptrauth).has_value()) {
|
||||
// The operand is the callee and it has already been signed. Ignore this
|
||||
// because we cannot add another ptrauth bundle to the call instruction.
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Returns true if function \p F is eligible for merging.
|
||||
bool isEligibleFunction(Function *F) {
|
||||
if (F->isDeclaration())
|
||||
return false;
|
||||
|
||||
if (F->hasFnAttribute(llvm::Attribute::NoMerge) ||
|
||||
F->hasFnAttribute(llvm::Attribute::AlwaysInline))
|
||||
return false;
|
||||
|
||||
if (F->hasAvailableExternallyLinkage())
|
||||
return false;
|
||||
|
||||
if (F->getFunctionType()->isVarArg())
|
||||
return false;
|
||||
|
||||
if (F->getCallingConv() == CallingConv::SwiftTail)
|
||||
return false;
|
||||
|
||||
// If function contains callsites with musttail, if we merge
|
||||
// it, the merged function will have the musttail callsite, but
|
||||
// the number of parameters can change, thus the parameter count
|
||||
// of the callsite will mismatch with the function itself.
|
||||
for (const BasicBlock &BB : *F) {
|
||||
for (const Instruction &I : BB) {
|
||||
const auto *CB = dyn_cast<CallBase>(&I);
|
||||
if (CB && CB->isMustTailCall())
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool isEligibleInstrunctionForConstantSharing(const Instruction *I) {
|
||||
switch (I->getOpcode()) {
|
||||
case Instruction::Load:
|
||||
case Instruction::Store:
|
||||
case Instruction::Call:
|
||||
case Instruction::Invoke:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool ignoreOp(const Instruction *I, unsigned OpIdx) {
|
||||
assert(OpIdx < I->getNumOperands() && "Invalid operand index");
|
||||
|
||||
if (!isEligibleInstrunctionForConstantSharing(I))
|
||||
return false;
|
||||
|
||||
if (!isa<Constant>(I->getOperand(OpIdx)))
|
||||
return false;
|
||||
|
||||
if (const auto *CI = dyn_cast<CallBase>(I))
|
||||
return canParameterizeCallOperand(CI, OpIdx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
|
||||
Type *SrcTy = V->getType();
|
||||
if (SrcTy->isStructTy()) {
|
||||
assert(DestTy->isStructTy());
|
||||
assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements());
|
||||
Value *Result = PoisonValue::get(DestTy);
|
||||
for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) {
|
||||
Value *Element =
|
||||
createCast(Builder, Builder.CreateExtractValue(V, ArrayRef(I)),
|
||||
DestTy->getStructElementType(I));
|
||||
|
||||
Result = Builder.CreateInsertValue(Result, Element, ArrayRef(I));
|
||||
}
|
||||
return Result;
|
||||
}
|
||||
assert(!DestTy->isStructTy());
|
||||
if (auto *SrcAT = dyn_cast<ArrayType>(SrcTy)) {
|
||||
auto *DestAT = dyn_cast<ArrayType>(DestTy);
|
||||
assert(DestAT);
|
||||
assert(SrcAT->getNumElements() == DestAT->getNumElements());
|
||||
Value *Result = UndefValue::get(DestTy);
|
||||
for (unsigned int I = 0, E = SrcAT->getNumElements(); I < E; ++I) {
|
||||
Value *Element =
|
||||
createCast(Builder, Builder.CreateExtractValue(V, ArrayRef(I)),
|
||||
DestAT->getElementType());
|
||||
|
||||
Result = Builder.CreateInsertValue(Result, Element, ArrayRef(I));
|
||||
}
|
||||
return Result;
|
||||
}
|
||||
assert(!DestTy->isArrayTy());
|
||||
if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
|
||||
return Builder.CreateIntToPtr(V, DestTy);
|
||||
if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
|
||||
return Builder.CreatePtrToInt(V, DestTy);
|
||||
return Builder.CreateBitCast(V, DestTy);
|
||||
}
|
||||
|
||||
void GlobalMergeFunc::analyze(Module &M) {
|
||||
++NumAnalyzedModues;
|
||||
for (Function &Func : M) {
|
||||
++NumAnalyzedFunctions;
|
||||
if (isEligibleFunction(&Func)) {
|
||||
++NumEligibleFunctions;
|
||||
|
||||
auto FI = llvm::StructuralHashWithDifferences(Func, ignoreOp);
|
||||
|
||||
// Convert the operand map to a vector for a serialization-friendly
|
||||
// format.
|
||||
IndexOperandHashVecType IndexOperandHashes;
|
||||
for (auto &Pair : *FI.IndexOperandHashMap)
|
||||
IndexOperandHashes.emplace_back(Pair);
|
||||
|
||||
StableFunction SF(FI.FunctionHash, get_stable_name(Func.getName()).str(),
|
||||
M.getModuleIdentifier(), FI.IndexInstruction->size(),
|
||||
std::move(IndexOperandHashes));
|
||||
|
||||
LocalFunctionMap->insert(SF);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Tuple to hold function info to process merging.
|
||||
struct FuncMergeInfo {
|
||||
StableFunctionMap::StableFunctionEntry *SF;
|
||||
Function *F;
|
||||
std::unique_ptr<IndexInstrMap> IndexInstruction;
|
||||
FuncMergeInfo(StableFunctionMap::StableFunctionEntry *SF, Function *F,
|
||||
std::unique_ptr<IndexInstrMap> IndexInstruction)
|
||||
: SF(SF), F(F), IndexInstruction(std::move(IndexInstruction)) {}
|
||||
};
|
||||
|
||||
// Given the func info, and the parameterized locations, create and return
|
||||
// a new merged function by replacing the original constants with the new
|
||||
// parameters.
|
||||
static Function *createMergedFunction(FuncMergeInfo &FI,
|
||||
ArrayRef<Type *> ConstParamTypes,
|
||||
const ParamLocsVecTy &ParamLocsVec) {
|
||||
// Synthesize a new merged function name by appending ".Tgm" to the root
|
||||
// function's name.
|
||||
auto *MergedFunc = FI.F;
|
||||
std::string NewFunctionName =
|
||||
MergedFunc->getName().str() + GlobalMergeFunc::MergingInstanceSuffix;
|
||||
auto *M = MergedFunc->getParent();
|
||||
assert(!M->getFunction(NewFunctionName));
|
||||
|
||||
FunctionType *OrigTy = MergedFunc->getFunctionType();
|
||||
// Get the original params' types.
|
||||
SmallVector<Type *> ParamTypes(OrigTy->param_begin(), OrigTy->param_end());
|
||||
// Append const parameter types that are passed in.
|
||||
ParamTypes.append(ConstParamTypes.begin(), ConstParamTypes.end());
|
||||
FunctionType *FuncType = FunctionType::get(OrigTy->getReturnType(),
|
||||
ParamTypes, /*isVarArg=*/false);
|
||||
|
||||
// Declare a new function
|
||||
Function *NewFunction =
|
||||
Function::Create(FuncType, MergedFunc->getLinkage(), NewFunctionName);
|
||||
if (auto *SP = MergedFunc->getSubprogram())
|
||||
NewFunction->setSubprogram(SP);
|
||||
NewFunction->copyAttributesFrom(MergedFunc);
|
||||
NewFunction->setDLLStorageClass(GlobalValue::DefaultStorageClass);
|
||||
|
||||
NewFunction->setLinkage(GlobalValue::InternalLinkage);
|
||||
NewFunction->addFnAttr(Attribute::NoInline);
|
||||
|
||||
// Add the new function before the root function.
|
||||
M->getFunctionList().insert(MergedFunc->getIterator(), NewFunction);
|
||||
|
||||
// Move the body of MergedFunc into the NewFunction.
|
||||
NewFunction->splice(NewFunction->begin(), MergedFunc);
|
||||
|
||||
// Update the original args by the new args.
|
||||
auto NewArgIter = NewFunction->arg_begin();
|
||||
for (Argument &OrigArg : MergedFunc->args()) {
|
||||
Argument &NewArg = *NewArgIter++;
|
||||
OrigArg.replaceAllUsesWith(&NewArg);
|
||||
}
|
||||
|
||||
// Replace the original Constants by the new args.
|
||||
unsigned NumOrigArgs = MergedFunc->arg_size();
|
||||
for (unsigned ParamIdx = 0; ParamIdx < ParamLocsVec.size(); ++ParamIdx) {
|
||||
Argument *NewArg = NewFunction->getArg(NumOrigArgs + ParamIdx);
|
||||
for (auto [InstIndex, OpndIndex] : ParamLocsVec[ParamIdx]) {
|
||||
auto *Inst = FI.IndexInstruction->lookup(InstIndex);
|
||||
auto *OrigC = Inst->getOperand(OpndIndex);
|
||||
if (OrigC->getType() != NewArg->getType()) {
|
||||
IRBuilder<> Builder(Inst->getParent(), Inst->getIterator());
|
||||
Inst->setOperand(OpndIndex,
|
||||
createCast(Builder, NewArg, OrigC->getType()));
|
||||
} else {
|
||||
Inst->setOperand(OpndIndex, NewArg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NewFunction;
|
||||
}
|
||||
|
||||
// Given the original function (Thunk) and the merged function (ToFunc), create
|
||||
// a thunk to the merged function.
|
||||
static void createThunk(FuncMergeInfo &FI, ArrayRef<Constant *> Params,
|
||||
Function *ToFunc) {
|
||||
auto *Thunk = FI.F;
|
||||
|
||||
assert(Thunk->arg_size() + Params.size() ==
|
||||
ToFunc->getFunctionType()->getNumParams());
|
||||
Thunk->dropAllReferences();
|
||||
|
||||
BasicBlock *BB = BasicBlock::Create(Thunk->getContext(), "", Thunk);
|
||||
IRBuilder<> Builder(BB);
|
||||
|
||||
SmallVector<Value *> Args;
|
||||
unsigned ParamIdx = 0;
|
||||
FunctionType *ToFuncTy = ToFunc->getFunctionType();
|
||||
|
||||
// Add arguments which are passed through Thunk.
|
||||
for (Argument &AI : Thunk->args()) {
|
||||
Args.push_back(createCast(Builder, &AI, ToFuncTy->getParamType(ParamIdx)));
|
||||
++ParamIdx;
|
||||
}
|
||||
|
||||
// Add new arguments defined by Params.
|
||||
for (auto *Param : Params) {
|
||||
assert(ParamIdx < ToFuncTy->getNumParams());
|
||||
Args.push_back(
|
||||
createCast(Builder, Param, ToFuncTy->getParamType(ParamIdx)));
|
||||
++ParamIdx;
|
||||
}
|
||||
|
||||
CallInst *CI = Builder.CreateCall(ToFunc, Args);
|
||||
bool isSwiftTailCall = ToFunc->getCallingConv() == CallingConv::SwiftTail &&
|
||||
Thunk->getCallingConv() == CallingConv::SwiftTail;
|
||||
CI->setTailCallKind(isSwiftTailCall ? llvm::CallInst::TCK_MustTail
|
||||
: llvm::CallInst::TCK_Tail);
|
||||
CI->setCallingConv(ToFunc->getCallingConv());
|
||||
CI->setAttributes(ToFunc->getAttributes());
|
||||
if (Thunk->getReturnType()->isVoidTy())
|
||||
Builder.CreateRetVoid();
|
||||
else
|
||||
Builder.CreateRet(createCast(Builder, CI, Thunk->getReturnType()));
|
||||
}
|
||||
|
||||
// Check if the old merged/optimized IndexOperandHashMap is compatible with
|
||||
// the current IndexOperandHashMap. An operand hash may not be stable across
|
||||
// different builds due to varying modules combined. To address this, we relax
|
||||
// the hash check condition by comparing Const hash patterns instead of absolute
|
||||
// hash values. For example, let's assume we have three Consts located at idx1,
|
||||
// idx3, and idx6, where their corresponding hashes are hash1, hash2, and hash1
|
||||
// in the old merged map below:
|
||||
// Old (Merged): [(idx1, hash1), (idx3, hash2), (idx6, hash1)]
|
||||
// Current: [(idx1, hash1'), (idx3, hash2'), (idx6, hash1')]
|
||||
// If the current function also has three Consts in the same locations,
|
||||
// with hash sequences hash1', hash2', and hash1' where the first and third
|
||||
// are the same as the old hash sequences, we consider them matched.
|
||||
static bool checkConstHashCompatible(
|
||||
const DenseMap<IndexPair, stable_hash> &OldInstOpndIndexToConstHash,
|
||||
const DenseMap<IndexPair, stable_hash> &CurrInstOpndIndexToConstHash) {
|
||||
|
||||
DenseMap<stable_hash, stable_hash> OldHashToCurrHash;
|
||||
for (const auto &[Index, OldHash] : OldInstOpndIndexToConstHash) {
|
||||
auto It = CurrInstOpndIndexToConstHash.find(Index);
|
||||
if (It == CurrInstOpndIndexToConstHash.end())
|
||||
return false;
|
||||
|
||||
auto CurrHash = It->second;
|
||||
auto J = OldHashToCurrHash.find(OldHash);
|
||||
if (J == OldHashToCurrHash.end())
|
||||
OldHashToCurrHash.insert({OldHash, CurrHash});
|
||||
else if (J->second != CurrHash)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Validate the locations pointed by a param has the same hash and Constant.
|
||||
static bool
|
||||
checkConstLocationCompatible(const StableFunctionMap::StableFunctionEntry &SF,
|
||||
const IndexInstrMap &IndexInstruction,
|
||||
const ParamLocsVecTy &ParamLocsVec) {
|
||||
for (auto &ParamLocs : ParamLocsVec) {
|
||||
std::optional<stable_hash> OldHash;
|
||||
std::optional<Constant *> OldConst;
|
||||
for (auto &Loc : ParamLocs) {
|
||||
assert(SF.IndexOperandHashMap->count(Loc));
|
||||
auto CurrHash = SF.IndexOperandHashMap.get()->at(Loc);
|
||||
auto [InstIndex, OpndIndex] = Loc;
|
||||
assert(InstIndex < IndexInstruction.size());
|
||||
const auto *Inst = IndexInstruction.lookup(InstIndex);
|
||||
auto *CurrConst = cast<Constant>(Inst->getOperand(OpndIndex));
|
||||
if (!OldHash) {
|
||||
OldHash = CurrHash;
|
||||
OldConst = CurrConst;
|
||||
} else if (CurrConst != *OldConst || CurrHash != *OldHash) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static ParamLocsVecTy computeParamInfo(
|
||||
const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>>
|
||||
&SFS) {
|
||||
std::map<std::vector<stable_hash>, ParamLocs> HashSeqToLocs;
|
||||
auto &RSF = *SFS[0];
|
||||
unsigned StableFunctionCount = SFS.size();
|
||||
|
||||
for (auto &[IndexPair, Hash] : *RSF.IndexOperandHashMap) {
|
||||
// Const hash sequence across stable functions.
|
||||
// We will allocate a parameter per unique hash squence.
|
||||
// can't use SmallVector as key
|
||||
std::vector<stable_hash> ConstHashSeq;
|
||||
ConstHashSeq.push_back(Hash);
|
||||
bool Identical = true;
|
||||
for (unsigned J = 1; J < StableFunctionCount; ++J) {
|
||||
auto &SF = SFS[J];
|
||||
auto SHash = SF->IndexOperandHashMap->at(IndexPair);
|
||||
if (Hash != SHash)
|
||||
Identical = false;
|
||||
ConstHashSeq.push_back(SHash);
|
||||
}
|
||||
|
||||
if (Identical)
|
||||
continue;
|
||||
|
||||
// For each unique Const hash sequence (parameter), add the locations.
|
||||
HashSeqToLocs[ConstHashSeq].push_back(IndexPair);
|
||||
}
|
||||
|
||||
ParamLocsVecTy ParamLocsVec;
|
||||
for (auto &[HashSeq, Locs] : HashSeqToLocs) {
|
||||
ParamLocsVec.push_back(std::move(Locs));
|
||||
llvm::sort(ParamLocsVec, [&](const ParamLocs &L, const ParamLocs &R) {
|
||||
return L[0] < R[0];
|
||||
});
|
||||
}
|
||||
return ParamLocsVec;
|
||||
}
|
||||
|
||||
bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
|
||||
bool Changed = false;
|
||||
|
||||
// Build a map from stable function name to function.
|
||||
StringMap<Function *> StableNameToFuncMap;
|
||||
for (auto &F : M)
|
||||
StableNameToFuncMap[get_stable_name(F.getName())] = &F;
|
||||
// Track merged functions
|
||||
DenseSet<Function *> MergedFunctions;
|
||||
|
||||
auto ModId = M.getModuleIdentifier();
|
||||
for (auto &[Hash, SFS] : FunctionMap->getFunctionMap()) {
|
||||
// Parameter locations based on the unique hash sequences
|
||||
// across the candidates.
|
||||
std::optional<ParamLocsVecTy> ParamLocsVec;
|
||||
Function *MergedFunc = nullptr;
|
||||
std::string MergedModId;
|
||||
SmallVector<FuncMergeInfo> FuncMergeInfos;
|
||||
for (auto &SF : SFS) {
|
||||
// Get the function from the stable name.
|
||||
auto I = StableNameToFuncMap.find(
|
||||
*FunctionMap->getNameForId(SF->FunctionNameId));
|
||||
if (I == StableNameToFuncMap.end())
|
||||
continue;
|
||||
Function *F = I->second;
|
||||
assert(F);
|
||||
// Skip if the function has been merged before.
|
||||
if (MergedFunctions.count(F))
|
||||
continue;
|
||||
// Consider the function if it is eligible for merging.
|
||||
if (!isEligibleFunction(F))
|
||||
continue;
|
||||
|
||||
auto FI = llvm::StructuralHashWithDifferences(*F, ignoreOp);
|
||||
uint64_t FuncHash = FI.FunctionHash;
|
||||
if (Hash != FuncHash) {
|
||||
++NumMismatchedFunctionHash;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (SF->InstCount != FI.IndexInstruction->size()) {
|
||||
++NumMismatchedInstCount;
|
||||
continue;
|
||||
}
|
||||
bool HasValidSharedConst = true;
|
||||
for (auto &[Index, Hash] : *SF->IndexOperandHashMap) {
|
||||
auto [InstIndex, OpndIndex] = Index;
|
||||
assert(InstIndex < FI.IndexInstruction->size());
|
||||
auto *Inst = FI.IndexInstruction->lookup(InstIndex);
|
||||
if (!ignoreOp(Inst, OpndIndex)) {
|
||||
HasValidSharedConst = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!HasValidSharedConst) {
|
||||
++NumMismatchedConstHash;
|
||||
continue;
|
||||
}
|
||||
if (!checkConstHashCompatible(*SF->IndexOperandHashMap,
|
||||
*FI.IndexOperandHashMap)) {
|
||||
++NumMismatchedConstHash;
|
||||
continue;
|
||||
}
|
||||
if (!ParamLocsVec.has_value()) {
|
||||
ParamLocsVec = computeParamInfo(SFS);
|
||||
LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging hash: " << Hash
|
||||
<< " with Params " << ParamLocsVec->size() << "\n");
|
||||
}
|
||||
if (!checkConstLocationCompatible(*SF, *FI.IndexInstruction,
|
||||
*ParamLocsVec)) {
|
||||
++NumMismatchedConstHash;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (MergedFunc) {
|
||||
// Check if the matched functions fall into the same (first) module.
|
||||
// This module check is not strictly necessary as the functions can move
|
||||
// around. We just want to avoid merging functions from different
|
||||
// modules than the first one in the function map, as they may not end
|
||||
// up with being ICFed by the linker.
|
||||
if (MergedModId != *FunctionMap->getNameForId(SF->ModuleNameId)) {
|
||||
++NumMismatchedModuleId;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
MergedFunc = F;
|
||||
MergedModId = *FunctionMap->getNameForId(SF->ModuleNameId);
|
||||
}
|
||||
|
||||
FuncMergeInfos.emplace_back(SF.get(), F, std::move(FI.IndexInstruction));
|
||||
MergedFunctions.insert(F);
|
||||
}
|
||||
unsigned FuncMergeInfoSize = FuncMergeInfos.size();
|
||||
if (FuncMergeInfoSize == 0)
|
||||
continue;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging function count "
|
||||
<< FuncMergeInfoSize << " in " << ModId << "\n");
|
||||
|
||||
for (auto &FMI : FuncMergeInfos) {
|
||||
Changed = true;
|
||||
|
||||
// We've already validated all locations of constant operands pointed by
|
||||
// the parameters. Populate parameters pointing to the original constants.
|
||||
SmallVector<Constant *> Params;
|
||||
SmallVector<Type *> ParamTypes;
|
||||
for (auto &ParamLocs : *ParamLocsVec) {
|
||||
assert(!ParamLocs.empty());
|
||||
auto &[InstIndex, OpndIndex] = ParamLocs[0];
|
||||
auto *Inst = FMI.IndexInstruction->lookup(InstIndex);
|
||||
auto *Opnd = cast<Constant>(Inst->getOperand(OpndIndex));
|
||||
Params.push_back(Opnd);
|
||||
ParamTypes.push_back(Opnd->getType());
|
||||
}
|
||||
|
||||
// Create a merged function derived from the current function.
|
||||
Function *MergedFunc =
|
||||
createMergedFunction(FMI, ParamTypes, *ParamLocsVec);
|
||||
|
||||
LLVM_DEBUG({
|
||||
dbgs() << "[GlobalMergeFunc] Merged function (hash:" << FMI.SF->Hash
|
||||
<< ") " << MergedFunc->getName() << " generated from "
|
||||
<< FMI.F->getName() << ":\n";
|
||||
MergedFunc->dump();
|
||||
});
|
||||
|
||||
// Transform the current function into a thunk that calls the merged
|
||||
// function.
|
||||
createThunk(FMI, Params, MergedFunc);
|
||||
LLVM_DEBUG({
|
||||
dbgs() << "[GlobalMergeFunc] Thunk generated: \n";
|
||||
FMI.F->dump();
|
||||
});
|
||||
++NumMergedFunctions;
|
||||
}
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
void GlobalMergeFunc::initializeMergerMode(const Module &M) {
|
||||
// Initialize the local function map regardless of the merger mode.
|
||||
LocalFunctionMap = std::make_unique<StableFunctionMap>();
|
||||
|
||||
// Disable codegen data for merging. The local merge is still enabled.
|
||||
if (DisableCGDataForMerging)
|
||||
return;
|
||||
|
||||
// (Full)LTO module does not have functions added to the index.
|
||||
// In this case, we run a local merger without using codegen data.
|
||||
if (Index && !Index->hasExportedFunctions(M))
|
||||
return;
|
||||
|
||||
if (cgdata::emitCGData())
|
||||
MergerMode = HashFunctionMode::BuildingHashFuncion;
|
||||
else if (cgdata::hasStableFunctionMap())
|
||||
MergerMode = HashFunctionMode::UsingHashFunction;
|
||||
}
|
||||
|
||||
void GlobalMergeFunc::emitFunctionMap(Module &M) {
|
||||
LLVM_DEBUG(dbgs() << "Emit function map. Size: " << LocalFunctionMap->size()
|
||||
<< "\n");
|
||||
// No need to emit the function map if it is empty.
|
||||
if (LocalFunctionMap->empty())
|
||||
return;
|
||||
SmallVector<char> Buf;
|
||||
raw_svector_ostream OS(Buf);
|
||||
|
||||
StableFunctionMapRecord::serialize(OS, LocalFunctionMap.get());
|
||||
|
||||
std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
|
||||
OS.str(), "in-memory stable function map", false);
|
||||
|
||||
Triple TT(M.getTargetTriple());
|
||||
embedBufferInModule(M, *Buffer.get(),
|
||||
getCodeGenDataSectionName(CG_merge, TT.getObjectFormat()),
|
||||
Align(4));
|
||||
}
|
||||
|
||||
bool GlobalMergeFunc::run(Module &M) {
|
||||
initializeMergerMode(M);
|
||||
|
||||
const StableFunctionMap *FuncMap;
|
||||
if (MergerMode == HashFunctionMode::UsingHashFunction) {
|
||||
// Use the prior CG data to optimistically create global merge candidates.
|
||||
FuncMap = cgdata::getStableFunctionMap();
|
||||
} else {
|
||||
analyze(M);
|
||||
// Emit the local function map to the custom section, __llvm_merge before
|
||||
// finalizing it.
|
||||
if (MergerMode == HashFunctionMode::BuildingHashFuncion)
|
||||
emitFunctionMap(M);
|
||||
LocalFunctionMap->finalize();
|
||||
FuncMap = LocalFunctionMap.get();
|
||||
}
|
||||
|
||||
return merge(M, FuncMap);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class GlobalMergeFuncPassWrapper : public ModulePass {
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
GlobalMergeFuncPassWrapper();
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addUsedIfAvailable<ImmutableModuleSummaryIndexWrapperPass>();
|
||||
AU.setPreservesAll();
|
||||
ModulePass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
StringRef getPassName() const override { return "Global Merge Functions"; }
|
||||
|
||||
bool runOnModule(Module &M) override;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
char GlobalMergeFuncPassWrapper::ID = 0;
|
||||
INITIALIZE_PASS_BEGIN(GlobalMergeFuncPassWrapper, "global-merge-func",
|
||||
"Global merge function pass", false, false)
|
||||
INITIALIZE_PASS_END(GlobalMergeFuncPassWrapper, "global-merge-func",
|
||||
"Global merge function pass", false, false)
|
||||
|
||||
namespace llvm {
|
||||
ModulePass *createGlobalMergeFuncPass() {
|
||||
return new GlobalMergeFuncPassWrapper();
|
||||
}
|
||||
} // namespace llvm
|
||||
|
||||
GlobalMergeFuncPassWrapper::GlobalMergeFuncPassWrapper() : ModulePass(ID) {
|
||||
initializeGlobalMergeFuncPassWrapperPass(
|
||||
*llvm::PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
bool GlobalMergeFuncPassWrapper::runOnModule(Module &M) {
|
||||
const ModuleSummaryIndex *Index = nullptr;
|
||||
if (auto *IndexWrapperPass =
|
||||
getAnalysisIfAvailable<ImmutableModuleSummaryIndexWrapperPass>())
|
||||
Index = IndexWrapperPass->getIndex();
|
||||
|
||||
return GlobalMergeFunc(Index).run(M);
|
||||
}
|
||||
|
||||
PreservedAnalyses GlobalMergeFuncPass::run(Module &M,
|
||||
AnalysisManager<Module> &AM) {
|
||||
ModuleSummaryIndex *Index = &(AM.getResult<ModuleSummaryIndexAnalysis>(M));
|
||||
bool Changed = GlobalMergeFunc(Index).run(M);
|
||||
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
|
||||
}
|
@ -141,6 +141,9 @@ static cl::opt<RunOutliner> EnableMachineOutliner(
|
||||
"Disable all outlining"),
|
||||
// Sentinel value for unspecified option.
|
||||
clEnumValN(RunOutliner::AlwaysOutline, "", "")));
|
||||
static cl::opt<bool> EnableGlobalMergeFunc(
|
||||
"enable-global-merge-func", cl::Hidden,
|
||||
cl::desc("Enable global merge functions that are based on hash function"));
|
||||
// Disable the pass to fix unwind information. Whether the pass is included in
|
||||
// the pipeline is controlled via the target options, this option serves as
|
||||
// manual override.
|
||||
@ -489,6 +492,7 @@ CGPassBuilderOption llvm::getCGPassBuilderOption() {
|
||||
|
||||
SET_BOOLEAN_OPTION(EarlyLiveIntervals)
|
||||
SET_BOOLEAN_OPTION(EnableBlockPlacementStats)
|
||||
SET_BOOLEAN_OPTION(EnableGlobalMergeFunc)
|
||||
SET_BOOLEAN_OPTION(EnableImplicitNullChecks)
|
||||
SET_BOOLEAN_OPTION(EnableMachineOutliner)
|
||||
SET_BOOLEAN_OPTION(MISchedPostRA)
|
||||
@ -884,6 +888,9 @@ void TargetPassConfig::addIRPasses() {
|
||||
// Convert conditional moves to conditional jumps when profitable.
|
||||
if (getOptLevel() != CodeGenOptLevel::None && !DisableSelectOptimize)
|
||||
addPass(createSelectOptimizePass());
|
||||
|
||||
if (EnableGlobalMergeFunc)
|
||||
addPass(createGlobalMergeFuncPass());
|
||||
}
|
||||
|
||||
/// Turn exception handling constructs into something the code generators can
|
||||
|
@ -91,6 +91,7 @@
|
||||
#include "llvm/CodeGen/FinalizeISel.h"
|
||||
#include "llvm/CodeGen/GCMetadata.h"
|
||||
#include "llvm/CodeGen/GlobalMerge.h"
|
||||
#include "llvm/CodeGen/GlobalMergeFunctions.h"
|
||||
#include "llvm/CodeGen/HardwareLoops.h"
|
||||
#include "llvm/CodeGen/IndirectBrExpand.h"
|
||||
#include "llvm/CodeGen/InterleavedAccess.h"
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/Analysis/ScopedNoAliasAA.h"
|
||||
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
|
||||
#include "llvm/CodeGen/GlobalMergeFunctions.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Passes/OptimizationLevel.h"
|
||||
|
@ -70,6 +70,7 @@ MODULE_PASS("extract-blocks", BlockExtractorPass({}, false))
|
||||
MODULE_PASS("expand-variadics", ExpandVariadicsPass(ExpandVariadicsMode::Disable))
|
||||
MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
|
||||
MODULE_PASS("function-import", FunctionImportPass())
|
||||
MODULE_PASS("global-merge-func", GlobalMergeFuncPass())
|
||||
MODULE_PASS("globalopt", GlobalOptPass())
|
||||
MODULE_PASS("globalsplit", GlobalSplitPass())
|
||||
MODULE_PASS("hipstdpar-interpose-alloc", HipStdParAllocationInterpositionPass())
|
||||
|
77
llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll
Normal file
77
llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll
Normal file
@ -0,0 +1,77 @@
|
||||
; This test checks if two similar functions, f1 and f2, can be merged locally within a single module
|
||||
; while parameterizing a difference in their global variables, g1 and g2.
|
||||
; To achieve this, we create two instances of the global merging function, f1.Tgm and f2.Tgm,
|
||||
; which are tail-called from thunks f1 and f2 respectively.
|
||||
; These identical functions, f1.Tgm and f2.Tgm, will be folded by the linker via Identical Code Folding (IFC).
|
||||
|
||||
; RUN: opt -S --passes=global-merge-func %s | FileCheck %s
|
||||
|
||||
; A merging instance is created with additional parameter.
|
||||
; CHECK: define internal i32 @f1.Tgm(i32 %0, ptr %1)
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: %idxprom = sext i32 %0 to i64
|
||||
; CHECK-NEXT: %arrayidx = getelementptr inbounds [0 x i32], ptr @g, i64 0, i64 %idxprom
|
||||
; CHECK-NEXT: %2 = load i32, ptr %arrayidx, align 4
|
||||
; CHECK-NEXT: %3 = load volatile i32, ptr %1, align 4
|
||||
; CHECK-NEXT: %mul = mul nsw i32 %3, %2
|
||||
; CHECK-NEXT: %add = add nsw i32 %mul, 1
|
||||
; CHECK-NEXT: ret i32 %add
|
||||
|
||||
; The original function becomes a thunk passing g1.
|
||||
; CHECK: define i32 @f1(i32 %a)
|
||||
; CHECK-NEXT: %1 = tail call i32 @f1.Tgm(i32 %a, ptr @g1)
|
||||
; CHECK-NEXT: ret i32 %1
|
||||
|
||||
; A same sequence is produced for f2.Tgm.
|
||||
; CHECK: define internal i32 @f2.Tgm(i32 %0, ptr %1)
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: %idxprom = sext i32 %0 to i64
|
||||
; CHECK-NEXT: %arrayidx = getelementptr inbounds [0 x i32], ptr @g, i64 0, i64 %idxprom
|
||||
; CHECK-NEXT: %2 = load i32, ptr %arrayidx, align 4
|
||||
; CHECK-NEXT: %3 = load volatile i32, ptr %1, align 4
|
||||
; CHECK-NEXT: %mul = mul nsw i32 %3, %2
|
||||
; CHECK-NEXT: %add = add nsw i32 %mul, 1
|
||||
; CHECK-NEXT: ret i32 %add
|
||||
|
||||
; The original function becomes a thunk passing g2.
|
||||
; CHECK: define i32 @f2(i32 %a)
|
||||
; CHECK-NEXT: %1 = tail call i32 @f2.Tgm(i32 %a, ptr @g2)
|
||||
; CHECK-NEXT: ret i32 %1
|
||||
|
||||
; RUN: llc -enable-global-merge-func=true < %s | FileCheck %s --check-prefix=MERGE
|
||||
; RUN: llc -enable-global-merge-func=false < %s | FileCheck %s --check-prefix=NOMERGE
|
||||
|
||||
; MERGE: _f1.Tgm
|
||||
; MERGE: _f2.Tgm
|
||||
|
||||
; NOMERGE-NOT: _f1.Tgm
|
||||
; NOMERGE-NOT: _f2.Tgm
|
||||
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "arm64-unknown-ios12.0.0"
|
||||
|
||||
@g = external local_unnamed_addr global [0 x i32], align 4
|
||||
@g1 = external global i32, align 4
|
||||
@g2 = external global i32, align 4
|
||||
|
||||
define i32 @f1(i32 %a) {
|
||||
entry:
|
||||
%idxprom = sext i32 %a to i64
|
||||
%arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%1 = load volatile i32, i32* @g1, align 4
|
||||
%mul = mul nsw i32 %1, %0
|
||||
%add = add nsw i32 %mul, 1
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
define i32 @f2(i32 %a) {
|
||||
entry:
|
||||
%idxprom = sext i32 %a to i64
|
||||
%arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%1 = load volatile i32, i32* @g2, align 4
|
||||
%mul = mul nsw i32 %1, %0
|
||||
%add = add nsw i32 %mul, 1
|
||||
ret i32 %add
|
||||
}
|
82
llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll
Normal file
82
llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll
Normal file
@ -0,0 +1,82 @@
|
||||
; This test demonstrates how similar functions are handled during global outlining.
|
||||
; Currently, we do not attempt to share an merged function for identical sequences.
|
||||
; Instead, each merging instance is created uniquely.
|
||||
|
||||
; RUN: rm -rf %t; split-file %s %t
|
||||
|
||||
; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc
|
||||
; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc
|
||||
|
||||
; First, run with -codegen-data-generate=true to generate the cgdata in the object files.
|
||||
; Using llvm-cgdata, merge the cg data.
|
||||
; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=true %t-foo.bc %t-goo.bc -o %tout-write \
|
||||
; RUN: -r %t-foo.bc,_f1,px \
|
||||
; RUN: -r %t-goo.bc,_f2,px \
|
||||
; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
|
||||
; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
|
||||
; RUN: llvm-cgdata --merge -o %tout.cgdata %tout-write.1 %tout-write.2
|
||||
|
||||
; Now run with -codegen-data-use-path=%tout.cgdata to optimize the binary.
|
||||
; Each module has its own merging instance as it is matched against the merged cgdata.
|
||||
; RUN: llvm-lto2 run -enable-global-merge-func=true \
|
||||
; RUN: -codegen-data-use-path=%tout.cgdata \
|
||||
; RUN: %t-foo.bc %t-goo.bc -o %tout-read \
|
||||
; RUN: -r %t-foo.bc,_f1,px \
|
||||
; RUN: -r %t-goo.bc,_f2,px \
|
||||
; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
|
||||
; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
|
||||
; RUN: llvm-nm %tout-read.1 | FileCheck %s --check-prefix=READ1
|
||||
; RUN: llvm-nm %tout-read.2 | FileCheck %s --check-prefix=READ2
|
||||
; RUN: llvm-objdump -d %tout-read.1 | FileCheck %s --check-prefix=THUNK1
|
||||
; RUN: llvm-objdump -d %tout-read.2 | FileCheck %s --check-prefix=THUNK2
|
||||
|
||||
; READ1: _f1.Tgm
|
||||
; READ2: _f2.Tgm
|
||||
|
||||
; THUNK1: <_f1>:
|
||||
; THUNK1-NEXT: adrp x1,
|
||||
; THUNK1-NEXT: ldr x1, [x1]
|
||||
; THUNK1-NEXT: b
|
||||
|
||||
; THUNK2: <_f2>:
|
||||
; THUNK2-NEXT: adrp x1,
|
||||
; THUNK2-NEXT: ldr x1, [x1]
|
||||
; THUNK2-NEXT: b
|
||||
|
||||
;--- foo.ll
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "arm64-unknown-ios12.0.0"
|
||||
|
||||
@g = external local_unnamed_addr global [0 x i32], align 4
|
||||
@g1 = external global i32, align 4
|
||||
@g2 = external global i32, align 4
|
||||
|
||||
define i32 @f1(i32 %a) {
|
||||
entry:
|
||||
%idxprom = sext i32 %a to i64
|
||||
%arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%1 = load volatile i32, i32* @g1, align 4
|
||||
%mul = mul nsw i32 %1, %0
|
||||
%add = add nsw i32 %mul, 1
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
;--- goo.ll
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "arm64-unknown-ios12.0.0"
|
||||
|
||||
@g = external local_unnamed_addr global [0 x i32], align 4
|
||||
@g1 = external global i32, align 4
|
||||
@g2 = external global i32, align 4
|
||||
|
||||
define i32 @f2(i32 %a) {
|
||||
entry:
|
||||
%idxprom = sext i32 %a to i64
|
||||
%arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%1 = load volatile i32, i32* @g2, align 4
|
||||
%mul = mul nsw i32 %1, %0
|
||||
%add = add nsw i32 %mul, 1
|
||||
ret i32 %add
|
||||
}
|
68
llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll
Normal file
68
llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll
Normal file
@ -0,0 +1,68 @@
|
||||
; TODO: This test checks if the how similar functions are handled during global outlining
|
||||
; by repeating the codegen via -codegen-data-thinlto-two-rounds=true.
|
||||
|
||||
; RUN: rm -rf %t; split-file %s %t
|
||||
|
||||
; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc
|
||||
; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc
|
||||
|
||||
; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-thinlto-two-rounds=true %t-foo.bc %t-goo.bc -o %tout \
|
||||
; RUN: -r %t-foo.bc,_f1,px \
|
||||
; RUN: -r %t-goo.bc,_f2,px \
|
||||
; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
|
||||
; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
|
||||
; RUN: llvm-nm %tout.1 | FileCheck %s --check-prefix=OUT1
|
||||
; RUN: llvm-nm %tout.2 | FileCheck %s --check-prefix=OUT2
|
||||
; RUN: llvm-objdump -d %tout.1 | FileCheck %s --check-prefix=THUNK1
|
||||
; RUN: llvm-objdump -d %tout.2 | FileCheck %s --check-prefix=THUNK2
|
||||
|
||||
; OUT1: _f1.Tgm
|
||||
; OUT2: _f2.Tgm
|
||||
|
||||
; THUNK1: <_f1>:
|
||||
; THUNK1-NEXT: adrp x1,
|
||||
; THUNK1-NEXT: ldr x1, [x1]
|
||||
; THUNK1-NEXT: b
|
||||
|
||||
; THUNK2: <_f2>:
|
||||
; THUNK2-NEXT: adrp x1,
|
||||
; THUNK2-NEXT: ldr x1, [x1]
|
||||
; THUNK2-NEXT: b
|
||||
|
||||
;--- foo.ll
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "arm64-unknown-ios12.0.0"
|
||||
|
||||
@g = external local_unnamed_addr global [0 x i32], align 4
|
||||
@g1 = external global i32, align 4
|
||||
@g2 = external global i32, align 4
|
||||
|
||||
define i32 @f1(i32 %a) {
|
||||
entry:
|
||||
%idxprom = sext i32 %a to i64
|
||||
%arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%1 = load volatile i32, i32* @g1, align 4
|
||||
%mul = mul nsw i32 %1, %0
|
||||
%add = add nsw i32 %mul, 1
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
;--- goo.ll
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "arm64-unknown-ios12.0.0"
|
||||
|
||||
@g = external local_unnamed_addr global [0 x i32], align 4
|
||||
@g1 = external global i32, align 4
|
||||
@g2 = external global i32, align 4
|
||||
|
||||
define i32 @f2(i32 %a) {
|
||||
entry:
|
||||
%idxprom = sext i32 %a to i64
|
||||
%arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%1 = load volatile i32, i32* @g2, align 4
|
||||
%mul = mul nsw i32 %1, %0
|
||||
%add = add nsw i32 %mul, 1
|
||||
ret i32 %add
|
||||
}
|
97
llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
Normal file
97
llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
Normal file
@ -0,0 +1,97 @@
|
||||
; This test verifies whether a stable function is encoded into the __llvm_merge section
|
||||
; when the -codegen-data-generate flag is used under -enable-global-merge-func=true.
|
||||
|
||||
; RUN: rm -rf %t; split-file %s %t
|
||||
|
||||
; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc
|
||||
; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc
|
||||
|
||||
; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=false %t-foo.bc %t-goo.bc -o %tout-nowrite \
|
||||
; RUN: -r %t-foo.bc,_f1,px \
|
||||
; RUN: -r %t-goo.bc,_f2,px \
|
||||
; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
|
||||
; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
|
||||
; RUN: llvm-nm %tout-nowrite.1 | FileCheck %s --check-prefix=NOWRITE
|
||||
; RUN: llvm-nm %tout-nowrite.2 | FileCheck %s --check-prefix=NOWRITE
|
||||
|
||||
; No merge instance is locally created as each module has a singltone function.
|
||||
; NOWRITE-NOT: _f1.Tgm
|
||||
; NOWRITE-NOT: _f2.Tgm
|
||||
|
||||
; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=true %t-foo.bc %t-goo.bc -o %tout-nowrite \
|
||||
; RUN: -r %t-foo.bc,_f1,px \
|
||||
; RUN: -r %t-goo.bc,_f2,px \
|
||||
; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
|
||||
; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
|
||||
; RUN: llvm-nm %tout-nowrite.1 | FileCheck %s --check-prefix=WRITE
|
||||
; RUN: llvm-nm %tout-nowrite.2 | FileCheck %s --check-prefix=WRITE
|
||||
; RUN: llvm-objdump -h %tout-nowrite.1 | FileCheck %s --check-prefix=SECTNAME
|
||||
; RUN: llvm-objdump -h %tout-nowrite.2 | FileCheck %s --check-prefix=SECTNAME
|
||||
|
||||
; On a write mode, no merging happens yet for each module.
|
||||
; We only create stable functions and publish them into __llvm_merge section for each object.
|
||||
; WRITE-NOT: _f1.Tgm
|
||||
; WRITE-NOT: _f2.Tgm
|
||||
; SECTNAME: __llvm_merge
|
||||
|
||||
; Merge the cgdata using llvm-cgdata.
|
||||
; We now validate the content of the merged cgdata.
|
||||
; Two functions have the same hash with only one different constnat at a same location.
|
||||
; RUN: llvm-cgdata --merge -o %tout.cgdata %tout-nowrite.1 %tout-nowrite.2
|
||||
; RUN: llvm-cgdata --convert %tout.cgdata -o - | FileCheck %s
|
||||
|
||||
; CHECK: - Hash: [[#%d,HASH:]]
|
||||
; CHECK-NEXT: FunctionName: f1
|
||||
; CHECK-NEXT: ModuleName: {{.*}}
|
||||
; CHECK-NEXT: InstCount: [[#%d,INSTCOUNT:]]
|
||||
; CHECK-NEXT: IndexOperandHashes:
|
||||
; CHECK-NEXT: - InstIndex: [[#%d,INSTINDEX:]]
|
||||
; CHECK-NEXT: OpndIndex: [[#%d,OPNDINDEX:]]
|
||||
; CHECK-NEXT: OpndHash: {{.*}}
|
||||
|
||||
; CHECK: - Hash: [[#%d,HASH]]
|
||||
; CHECK-NEXT: FunctionName: f2
|
||||
; CHECK-NEXT: ModuleName: {{.*}}
|
||||
; CHECK-NEXT: InstCount: [[#%d,INSTCOUNT]]
|
||||
; CHECK-NEXT: IndexOperandHashes:
|
||||
; CHECK-NEXT: - InstIndex: [[#%d,INSTINDEX]]
|
||||
; CHECK-NEXT: OpndIndex: [[#%d,OPNDINDEX]]
|
||||
; CHECK-NEXT: OpndHash: {{.*}}
|
||||
|
||||
;--- foo.ll
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "arm64-unknown-ios12.0.0"
|
||||
|
||||
@g = external local_unnamed_addr global [0 x i32], align 4
|
||||
@g1 = external global i32, align 4
|
||||
@g2 = external global i32, align 4
|
||||
|
||||
define i32 @f1(i32 %a) {
|
||||
entry:
|
||||
%idxprom = sext i32 %a to i64
|
||||
%arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%1 = load volatile i32, i32* @g1, align 4
|
||||
%mul = mul nsw i32 %1, %0
|
||||
%add = add nsw i32 %mul, 1
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
;--- goo.ll
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "arm64-unknown-ios12.0.0"
|
||||
|
||||
@g = external local_unnamed_addr global [0 x i32], align 4
|
||||
@g1 = external global i32, align 4
|
||||
@g2 = external global i32, align 4
|
||||
|
||||
define i32 @f2(i32 %a) {
|
||||
entry:
|
||||
%idxprom = sext i32 %a to i64
|
||||
%arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%1 = load volatile i32, i32* @g2, align 4
|
||||
%mul = mul nsw i32 %1, %0
|
||||
%add = add nsw i32 %mul, 1
|
||||
ret i32 %add
|
||||
}
|
@ -21,7 +21,7 @@ RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-funcmap-bytes.txt)/g" %t/merge-both-hasht
|
||||
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-both-hashtree-funcmap.ll -o %t/merge-both-hashtree-funcmap.o
|
||||
|
||||
# Merge an object file having cgdata (__llvm_outline and __llvm_merge)
|
||||
RUN: llvm-cgdata -m %t/merge-both-hashtree-funcmap.o -o %t/merge-both-hashtree-funcmap.cgdata
|
||||
RUN: llvm-cgdata -m --skip-trim %t/merge-both-hashtree-funcmap.o -o %t/merge-both-hashtree-funcmap.cgdata
|
||||
RUN: llvm-cgdata -s %t/merge-both-hashtree-funcmap.cgdata | FileCheck %s
|
||||
|
||||
CHECK: Outlined hash tree:
|
||||
|
@ -21,7 +21,7 @@ RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
|
||||
RUN: llvm-ar rcs %t/merge-archive.a %t/merge-1.o %t/merge-2.o
|
||||
|
||||
# Merge the archive into the codegen data file.
|
||||
RUN: llvm-cgdata --merge %t/merge-archive.a -o %t/merge-archive.cgdata
|
||||
RUN: llvm-cgdata --merge --skip-trim %t/merge-archive.a -o %t/merge-archive.cgdata
|
||||
RUN: llvm-cgdata --show %t/merge-archive.cgdata | FileCheck %s
|
||||
|
||||
RUN: llvm-cgdata --show %t/merge-archive.cgdata| FileCheck %s
|
||||
|
@ -15,7 +15,7 @@ RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed
|
||||
RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat-template-2.ll > %t/merge-concat.ll
|
||||
|
||||
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o
|
||||
RUN: llvm-cgdata --merge %t/merge-concat.o -o %t/merge-concat.cgdata
|
||||
RUN: llvm-cgdata --merge --skip-trim %t/merge-concat.o -o %t/merge-concat.cgdata
|
||||
RUN: llvm-cgdata --show %t/merge-concat.cgdata | FileCheck %s
|
||||
|
||||
CHECK: Stable function map:
|
||||
|
@ -18,7 +18,7 @@ RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2-template.ll >
|
||||
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
|
||||
|
||||
# Merge two object files into the codegen data file.
|
||||
RUN: llvm-cgdata --merge %t/merge-1.o %t/merge-2.o -o %t/merge.cgdata
|
||||
RUN: llvm-cgdata --merge --skip-trim %t/merge-1.o %t/merge-2.o -o %t/merge.cgdata
|
||||
|
||||
RUN: llvm-cgdata --show %t/merge.cgdata | FileCheck %s
|
||||
CHECK: Stable function map:
|
||||
|
@ -13,7 +13,7 @@ RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-single-bytes.txt)/g" %t/merge-single-temp
|
||||
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-single.ll -o %t/merge-single.o
|
||||
|
||||
# Merge an object file having cgdata (__llvm_merge)
|
||||
RUN: llvm-cgdata -m %t/merge-single.o -o %t/merge-single.cgdata
|
||||
RUN: llvm-cgdata -m --skip-trim %t/merge-single.o -o %t/merge-single.cgdata
|
||||
RUN: llvm-cgdata -s %t/merge-single.cgdata | FileCheck %s
|
||||
CHECK: Stable function map:
|
||||
CHECK-NEXT: Unique hash Count: 1
|
||||
|
@ -24,6 +24,7 @@ def : F<"m", "Alias for --merge">, Alias<merge>, Group<action_group>;
|
||||
|
||||
// Additional options
|
||||
def cgdata_version : FF<"cgdata-version", "Display the cgdata version">;
|
||||
def skip_trim : FF<"skip-trim", "Skip trimming content when merging the cgdata">;
|
||||
def output : Option<["--"], "output", KIND_SEPARATE>,
|
||||
HelpText<"Specify the name for the output file to be created">, MetaVarName<"<file>">;
|
||||
def : JoinedOrSeparate<["-"], "o">, Alias<output>, MetaVarName<"<file>">, HelpText<"Alias for --output">;
|
||||
|
@ -76,6 +76,7 @@ static StringRef ToolName;
|
||||
static StringRef OutputFilename = "-";
|
||||
static StringRef Filename;
|
||||
static bool ShowCGDataVersion;
|
||||
static bool SkipTrim;
|
||||
static CGDataAction Action;
|
||||
static std::optional<CGDataFormat> OutputFormat;
|
||||
static std::vector<std::string> InputFilenames;
|
||||
@ -214,7 +215,7 @@ static int merge_main(int argc, const char *argv[]) {
|
||||
if (!Result)
|
||||
exitWithError("failed to merge codegen data files.");
|
||||
|
||||
GlobalFunctionMapRecord.finalize();
|
||||
GlobalFunctionMapRecord.finalize(SkipTrim);
|
||||
|
||||
CodeGenDataWriter Writer;
|
||||
if (!GlobalOutlineRecord.empty())
|
||||
@ -301,6 +302,7 @@ static void parseArgs(int argc, char **argv) {
|
||||
}
|
||||
|
||||
ShowCGDataVersion = Args.hasArg(OPT_cgdata_version);
|
||||
SkipTrim = Args.hasArg(OPT_skip_trim);
|
||||
|
||||
if (opt::Arg *A = Args.getLastArg(OPT_format)) {
|
||||
StringRef OF = A->getValue();
|
||||
|
@ -108,8 +108,8 @@ TEST(StableFunctionMap, Finalize2) {
|
||||
|
||||
TEST(StableFunctionMap, Finalize3) {
|
||||
StableFunctionMap Map;
|
||||
StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}, {{1, 1}, 1}}};
|
||||
StableFunction Func2{1, "Func2", "Mod2", 2, {{{0, 1}, 2}, {{1, 1}, 1}}};
|
||||
StableFunction Func1{1, "Func1", "Mod1", 12, {{{0, 1}, 3}, {{1, 1}, 1}}};
|
||||
StableFunction Func2{1, "Func2", "Mod2", 12, {{{0, 1}, 2}, {{1, 1}, 1}}};
|
||||
Map.insert(Func1);
|
||||
Map.insert(Func2);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user