[MemProf] Disable hot hints by default (#124338)
By default we were marking some contexts as hot, and adding hot hints to unambiguously hot allocations. However, there is not yet support for cloning to expose hot allocation contexts, and none is planned for the forseeable future. While we convert hot contexts to notcold contexts during the cloning step, their existence was greatly limiting the context trimming performed when we add the MemProf profile to the IR. This change simply disables the generation of hot contexts / hints by default, as few allocations were unambiguously hot. A subsequent change will address the issue when hot hints are optionally enabled. See PR124219 for details. This change resulted in significant overhead reductions for a large target: ~48% reduction in the per-module ThinLTO bitcode summary sizes ~72% reduction in the distributed ThinLTO bitcode combined summary sizes ~68% reduction in thin link time ~34% reduction in thin link peak memory
This commit is contained in:
parent
074a25fb26
commit
ae8b560899
@ -42,6 +42,11 @@ cl::opt<unsigned> MemProfMinAveLifetimeAccessDensityHotThreshold(
|
||||
cl::desc("The minimum TotalLifetimeAccessDensity / AllocCount for an "
|
||||
"allocation to be considered hot"));
|
||||
|
||||
cl::opt<bool>
|
||||
MemProfUseHotHints("memprof-use-hot-hints", cl::init(false), cl::Hidden,
|
||||
cl::desc("Enable use of hot hints (only supported for "
|
||||
"unambigously hot allocations)"));
|
||||
|
||||
cl::opt<bool> MemProfReportHintedSizes(
|
||||
"memprof-report-hinted-sizes", cl::init(false), cl::Hidden,
|
||||
cl::desc("Report total allocation sizes of hinted allocations"));
|
||||
@ -60,7 +65,8 @@ AllocationType llvm::memprof::getAllocType(uint64_t TotalLifetimeAccessDensity,
|
||||
|
||||
// The access densities are multiplied by 100 to hold 2 decimal places of
|
||||
// precision, so need to divide by 100.
|
||||
if (((float)TotalLifetimeAccessDensity) / AllocCount / 100 >
|
||||
if (MemProfUseHotHints &&
|
||||
((float)TotalLifetimeAccessDensity) / AllocCount / 100 >
|
||||
MemProfMinAveLifetimeAccessDensityHotThreshold)
|
||||
return AllocationType::Hot;
|
||||
|
||||
|
@ -84,6 +84,8 @@
|
||||
; RUN: llvm-profdata merge -memprof-random-hotness -memprof-random-hotness-seed=1730170724 %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdatarand2 2>&1 | FileCheck %s --check-prefix=RAND2
|
||||
; RAND2: random hotness seed = 1730170724
|
||||
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdatarand2>' -pgo-warn-missing-function -S -stats 2>&1 | FileCheck %s --check-prefixes=MEMPROFRAND2,ALL,MEMPROFONLY,MEMPROFSTATS
|
||||
;; Check with hot hints enabled
|
||||
; RUN: opt < %s -memprof-use-hot-hints -passes='memprof-use<profile-filename=%t.memprofdatarand2>' -pgo-warn-missing-function -S -stats 2>&1 | FileCheck %s --check-prefixes=MEMPROFRAND2HOT
|
||||
|
||||
; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched
|
||||
; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched
|
||||
@ -408,8 +410,15 @@ for.end: ; preds = %for.cond
|
||||
; MEMPROFRAND2: !"cold"
|
||||
; MEMPROFRAND2: !"cold"
|
||||
; MEMPROFRAND2: !"cold"
|
||||
; MEMPROFRAND2: !"hot"
|
||||
; MEMPROFRAND2: !"hot"
|
||||
; MEMPROFRAND2: !"notcold"
|
||||
; MEMPROFRAND2: !"notcold"
|
||||
|
||||
;; With hot hints enabled the last 2 should be hot.
|
||||
; MEMPROFRAND2HOT: !"cold"
|
||||
; MEMPROFRAND2HOT: !"cold"
|
||||
; MEMPROFRAND2HOT: !"cold"
|
||||
; MEMPROFRAND2HOT: !"hot"
|
||||
; MEMPROFRAND2HOT: !"hot"
|
||||
|
||||
; MEMPROFSTATS: 8 memprof - Number of alloc contexts in memory profile.
|
||||
; MEMPROFSTATS: 10 memprof - Number of callsites in memory profile.
|
||||
|
@ -10,7 +10,9 @@
|
||||
;; $ clang++ -gmlt -fdebug-info-for-profiling -S %S/Inputs/memprof_loop_unroll_b.cc -emit-llvm
|
||||
|
||||
; RUN: llvm-profdata merge %S/Inputs/memprof_loop_unroll.memprofraw --profiled-binary %S/Inputs/memprof_loop_unroll.exe -o %t.memprofdata
|
||||
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -S -memprof-report-hinted-sizes 2>&1 | FileCheck %s
|
||||
;; Set the minimum lifetime threshold to 0 to ensure that one context is
|
||||
;; considered cold (the other will be notcold).
|
||||
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -S -memprof-report-hinted-sizes -memprof-ave-lifetime-cold-threshold=0 2>&1 | FileCheck %s
|
||||
|
||||
;; Conservatively annotate as not cold. We get two messages as there are two
|
||||
;; unrolled copies of the allocation.
|
||||
|
@ -25,6 +25,7 @@ using namespace llvm::memprof;
|
||||
extern cl::opt<float> MemProfLifetimeAccessDensityColdThreshold;
|
||||
extern cl::opt<unsigned> MemProfAveLifetimeColdThreshold;
|
||||
extern cl::opt<unsigned> MemProfMinAveLifetimeAccessDensityHotThreshold;
|
||||
extern cl::opt<bool> MemProfUseHotHints;
|
||||
|
||||
namespace {
|
||||
|
||||
@ -81,14 +82,23 @@ TEST_F(MemoryProfileInfoTest, GetAllocType) {
|
||||
// MemProfMinAveLifetimeAccessDensityHotThreshold
|
||||
// so compute the HotTotalLifetimeAccessDensityThreshold at the threshold.
|
||||
const uint64_t HotTotalLifetimeAccessDensityThreshold =
|
||||
(uint64_t)(MemProfMinAveLifetimeAccessDensityHotThreshold * AllocCount * 100);
|
||||
|
||||
(uint64_t)(MemProfMinAveLifetimeAccessDensityHotThreshold * AllocCount *
|
||||
100);
|
||||
|
||||
// Make sure the option for detecting hot allocations is set.
|
||||
MemProfUseHotHints = true;
|
||||
// Test Hot
|
||||
// More accesses per byte per sec than hot threshold is hot.
|
||||
EXPECT_EQ(getAllocType(HotTotalLifetimeAccessDensityThreshold + 1, AllocCount,
|
||||
ColdTotalLifetimeThreshold + 1),
|
||||
AllocationType::Hot);
|
||||
// Undo the manual set of the option above.
|
||||
cl::ResetAllOptionOccurrences();
|
||||
|
||||
// Without MemProfUseHotHints (default) we should treat simply as NotCold.
|
||||
EXPECT_EQ(getAllocType(HotTotalLifetimeAccessDensityThreshold + 1, AllocCount,
|
||||
ColdTotalLifetimeThreshold + 1),
|
||||
AllocationType::NotCold);
|
||||
|
||||
// Test Cold
|
||||
// Long lived with less accesses per byte per sec than cold threshold is cold.
|
||||
|
Loading…
x
Reference in New Issue
Block a user