[MemProf] Convert removal of memprof attrs and metadata to a pass (#163841)
In preparation for a follow on fix that removes these attributes and metadata in non-LTO pipelines, convert updateMemProfAttributes to a new MemProfRemoveInfo pass that executes at the start of the LTO backend pass pipelines when we don't have an index indicating that we linked with a library support hot cold operator new. This is largely NFC from an end user perspective but changes where the removal can be observed, hence the test updates. A follow on change will use the new pass for non-LTO pipelines (for cases when the bitcode is initially matched with memprof data but we decide to complete the compile without LTO).
This commit is contained in:
parent
36c9b4fd6d
commit
2a7e7e2ac4
@ -22,7 +22,7 @@
|
||||
|
||||
; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.o.thinlto.bc -save-temps=obj
|
||||
|
||||
; RUN: llvm-dis %t.s.3.import.bc -o - | FileCheck %s --check-prefix=CHECK-IR
|
||||
; RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
|
||||
; CHECK-IR: !memprof {{.*}} !callsite
|
||||
; CHECK-IR: "memprof"="cold"
|
||||
|
||||
@ -42,7 +42,7 @@
|
||||
|
||||
; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.o.thinlto.bc -save-temps=obj
|
||||
|
||||
; RUN: llvm-dis %t.s.3.import.bc -o - | FileCheck %s \
|
||||
; RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck %s \
|
||||
; RUN: --implicit-check-not "!memprof" --implicit-check-not "!callsite" \
|
||||
; RUN: --implicit-check-not "memprof"="cold"
|
||||
|
||||
|
||||
@ -105,12 +105,6 @@ setupStatsFile(StringRef StatsFilename);
|
||||
/// ordered indices to elements in the input array.
|
||||
LLVM_ABI std::vector<int> generateModulesOrdering(ArrayRef<BitcodeModule *> R);
|
||||
|
||||
/// Updates MemProf attributes (and metadata) based on whether the index
|
||||
/// has recorded that we are linking with allocation libraries containing
|
||||
/// the necessary APIs for downstream transformations.
|
||||
LLVM_ABI void updateMemProfAttributes(Module &Mod,
|
||||
const ModuleSummaryIndex &Index);
|
||||
|
||||
class LTO;
|
||||
struct SymbolResolution;
|
||||
|
||||
|
||||
@ -95,6 +95,16 @@ public:
|
||||
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
|
||||
isPrevailing);
|
||||
};
|
||||
|
||||
/// Strips MemProf attributes and metadata. Can be invoked by the pass pipeline
|
||||
/// when we don't have an index that has recorded that we are linking with
|
||||
/// allocation libraries containing the necessary APIs for downstream
|
||||
/// transformations.
|
||||
class MemProfRemoveInfo : public PassInfoMixin<MemProfRemoveInfo> {
|
||||
public:
|
||||
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H
|
||||
|
||||
@ -1257,38 +1257,6 @@ Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
|
||||
return Result;
|
||||
}
|
||||
|
||||
void lto::updateMemProfAttributes(Module &Mod,
|
||||
const ModuleSummaryIndex &Index) {
|
||||
llvm::TimeTraceScope timeScope("LTO update memprof attributes");
|
||||
if (Index.withSupportsHotColdNew())
|
||||
return;
|
||||
|
||||
// The profile matcher applies hotness attributes directly for allocations,
|
||||
// and those will cause us to generate calls to the hot/cold interfaces
|
||||
// unconditionally. If supports-hot-cold-new was not enabled in the LTO
|
||||
// link then assume we don't want these calls (e.g. not linking with
|
||||
// the appropriate library, or otherwise trying to disable this behavior).
|
||||
for (auto &F : Mod) {
|
||||
for (auto &BB : F) {
|
||||
for (auto &I : BB) {
|
||||
auto *CI = dyn_cast<CallBase>(&I);
|
||||
if (!CI)
|
||||
continue;
|
||||
if (CI->hasFnAttr("memprof"))
|
||||
CI->removeFnAttr("memprof");
|
||||
// Strip off all memprof metadata as it is no longer needed.
|
||||
// Importantly, this avoids the addition of new memprof attributes
|
||||
// after inlining propagation.
|
||||
// TODO: If we support additional types of MemProf metadata beyond hot
|
||||
// and cold, we will need to update the metadata based on the allocator
|
||||
// APIs supported instead of completely stripping all.
|
||||
CI->setMetadata(LLVMContext::MD_memprof, nullptr);
|
||||
CI->setMetadata(LLVMContext::MD_callsite, nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Error LTO::runRegularLTO(AddStreamFn AddStream) {
|
||||
llvm::TimeTraceScope timeScope("Run regular LTO");
|
||||
LLVMContext &CombinedCtx = RegularLTO.CombinedModule->getContext();
|
||||
@ -1346,8 +1314,6 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
|
||||
}
|
||||
}
|
||||
|
||||
updateMemProfAttributes(*RegularLTO.CombinedModule, ThinLTO.CombinedIndex);
|
||||
|
||||
bool WholeProgramVisibilityEnabledInLTO =
|
||||
Conf.HasWholeProgramVisibility &&
|
||||
// If validation is enabled, upgrade visibility only when all vtables
|
||||
|
||||
@ -726,7 +726,6 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
|
||||
}
|
||||
|
||||
// Do this after any importing so that imported code is updated.
|
||||
updateMemProfAttributes(Mod, CombinedIndex);
|
||||
updatePublicTypeTestCalls(Mod, CombinedIndex.withWholeProgramVisibility());
|
||||
|
||||
if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod))
|
||||
|
||||
@ -1803,6 +1803,12 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
|
||||
OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
|
||||
ModulePassManager MPM;
|
||||
|
||||
// If we are invoking this without a summary index noting that we are linking
|
||||
// with a library containing the necessary APIs, remove any MemProf related
|
||||
// attributes and metadata.
|
||||
if (!ImportSummary || !ImportSummary->withSupportsHotColdNew())
|
||||
MPM.addPass(MemProfRemoveInfo());
|
||||
|
||||
if (ImportSummary) {
|
||||
// For ThinLTO we must apply the context disambiguation decisions early, to
|
||||
// ensure we can correctly match the callsites to summary data.
|
||||
@ -1874,6 +1880,12 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
|
||||
|
||||
invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level);
|
||||
|
||||
// If we are invoking this without a summary index noting that we are linking
|
||||
// with a library containing the necessary APIs, remove any MemProf related
|
||||
// attributes and metadata.
|
||||
if (!ExportSummary || !ExportSummary->withSupportsHotColdNew())
|
||||
MPM.addPass(MemProfRemoveInfo());
|
||||
|
||||
// Create a function that performs CFI checks for cross-DSO calls with targets
|
||||
// in the current module.
|
||||
MPM.addPass(CrossDSOCFIPass());
|
||||
|
||||
@ -113,6 +113,7 @@ MODULE_PASS("pgo-force-function-attrs",
|
||||
? PGOOpt->ColdOptType
|
||||
: PGOOptions::ColdFuncOpt::Default))
|
||||
MODULE_PASS("memprof-context-disambiguation", MemProfContextDisambiguation())
|
||||
MODULE_PASS("memprof-remove-attributes", MemProfRemoveInfo())
|
||||
MODULE_PASS("memprof-module", ModuleMemProfilerPass())
|
||||
MODULE_PASS("mergefunc", MergeFunctionsPass())
|
||||
MODULE_PASS("metarenamer", MetaRenamerPass())
|
||||
|
||||
@ -6150,3 +6150,42 @@ void MemProfContextDisambiguation::run(
|
||||
IndexCallsiteContextGraph CCG(Index, isPrevailing);
|
||||
CCG.process();
|
||||
}
|
||||
|
||||
// Strips MemProf attributes and metadata. Can be invoked by the pass pipeline
|
||||
// when we don't have an index that has recorded that we are linking with
|
||||
// allocation libraries containing the necessary APIs for downstream
|
||||
// transformations.
|
||||
PreservedAnalyses MemProfRemoveInfo::run(Module &M, ModuleAnalysisManager &AM) {
|
||||
// The profile matcher applies hotness attributes directly for allocations,
|
||||
// and those will cause us to generate calls to the hot/cold interfaces
|
||||
// unconditionally. If supports-hot-cold-new was not enabled in the LTO
|
||||
// link then assume we don't want these calls (e.g. not linking with
|
||||
// the appropriate library, or otherwise trying to disable this behavior).
|
||||
bool Changed = false;
|
||||
for (auto &F : M) {
|
||||
for (auto &BB : F) {
|
||||
for (auto &I : BB) {
|
||||
auto *CI = dyn_cast<CallBase>(&I);
|
||||
if (!CI)
|
||||
continue;
|
||||
if (CI->hasFnAttr("memprof")) {
|
||||
CI->removeFnAttr("memprof");
|
||||
Changed = true;
|
||||
}
|
||||
if (!CI->hasMetadata(LLVMContext::MD_callsite)) {
|
||||
assert(!CI->hasMetadata(LLVMContext::MD_memprof));
|
||||
continue;
|
||||
}
|
||||
// Strip off all memprof metadata as it is no longer needed.
|
||||
// Importantly, this avoids the addition of new memprof attributes
|
||||
// after inlining propagation.
|
||||
CI->setMetadata(LLVMContext::MD_memprof, nullptr);
|
||||
CI->setMetadata(LLVMContext::MD_callsite, nullptr);
|
||||
Changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!Changed)
|
||||
return PreservedAnalyses::all();
|
||||
return PreservedAnalyses::none();
|
||||
}
|
||||
|
||||
@ -13,14 +13,14 @@
|
||||
; RUN: -r=%t.o,main,plx \
|
||||
; RUN: -r=%t.o,_Znam, \
|
||||
; RUN: -memprof-dump-ccg \
|
||||
; RUN: -save-temps \
|
||||
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP
|
||||
; DUMP: Callsite Context Graph:
|
||||
; RUN: -print-before=memprof-context-disambiguation \
|
||||
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR
|
||||
|
||||
; RUN: llvm-dis %t.out.0.0.preopt.bc -o - | FileCheck %s --check-prefix=IR
|
||||
; IR: !memprof {{.*}} !callsite
|
||||
; IR: "memprof"="cold"
|
||||
|
||||
; DUMP: Callsite Context Graph:
|
||||
|
||||
;; Next check without -supports-hot-cold-new, we should not perform
|
||||
;; context disambiguation, and we should strip memprof metadata and
|
||||
;; attributes before optimization.
|
||||
@ -28,13 +28,11 @@
|
||||
; RUN: -r=%t.o,main,plx \
|
||||
; RUN: -r=%t.o,_Znam, \
|
||||
; RUN: -memprof-dump-ccg \
|
||||
; RUN: -save-temps \
|
||||
; RUN: -print-before=memprof-context-disambiguation \
|
||||
; RUN: -o %t.out 2>&1 | FileCheck %s --allow-empty \
|
||||
; RUN: --implicit-check-not "Callsite Context Graph:"
|
||||
|
||||
; RUN: llvm-dis %t.out.0.0.preopt.bc -o - | FileCheck %s \
|
||||
; RUN: --implicit-check-not "!memprof" --implicit-check-not "!callsite" \
|
||||
; RUN: --implicit-check-not "memprof"="cold"
|
||||
; RUN: --implicit-check-not "Callsite Context Graph:" \
|
||||
; RUN: --implicit-check-not "!memprof" --implicit-check-not "!callsite" \
|
||||
; RUN: --implicit-check-not "memprof"="cold"
|
||||
|
||||
source_filename = "memprof-supports-hot-cold-new.ll"
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
@ -17,11 +17,12 @@
|
||||
; RUN: -r=%t/foo.o,foo,plx \
|
||||
; RUN: -r=%t/foo.o,_Znam, \
|
||||
; RUN: -memprof-dump-ccg \
|
||||
; RUN: -save-temps \
|
||||
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP
|
||||
; RUN: -print-before=memprof-context-disambiguation \
|
||||
; RUN: -thinlto-threads=1 \
|
||||
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR
|
||||
|
||||
; DUMP: Callsite Context Graph:
|
||||
|
||||
; RUN: llvm-dis %t.out.1.3.import.bc -o - | FileCheck %s --check-prefix=IR
|
||||
; IR: @main()
|
||||
; IR: !memprof {{.*}} !callsite
|
||||
; IR: @_Znam(i64 0) #[[ATTR:[0-9]+]]
|
||||
@ -41,13 +42,12 @@
|
||||
; RUN: -r=%t/foo.o,foo,plx \
|
||||
; RUN: -r=%t/foo.o,_Znam, \
|
||||
; RUN: -memprof-dump-ccg \
|
||||
; RUN: -save-temps \
|
||||
; RUN: -print-before=memprof-context-disambiguation \
|
||||
; RUN: -thinlto-threads=1 \
|
||||
; RUN: -o %t.out 2>&1 | FileCheck %s --allow-empty \
|
||||
; RUN: --implicit-check-not "Callsite Context Graph:"
|
||||
|
||||
; RUN: llvm-dis %t.out.1.3.import.bc -o - | FileCheck %s \
|
||||
; RUN: --implicit-check-not "!memprof" --implicit-check-not "!callsite" \
|
||||
; RUN: --implicit-check-not "memprof"="cold"
|
||||
; RUN: --implicit-check-not "Callsite Context Graph:" \
|
||||
; RUN: --implicit-check-not "!memprof" --implicit-check-not "!callsite" \
|
||||
; RUN: --implicit-check-not "memprof"="cold"
|
||||
|
||||
;--- main.ll
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user