Extend MemProfUse pass to make use of data access profiles
This commit is contained in:
parent
0844812b2e
commit
925e9fd60c
@ -729,6 +729,11 @@ public:
|
||||
LLVM_ABI DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
|
||||
getMemProfCallerCalleePairs() const;
|
||||
|
||||
// Returns non-owned pointer to data access profile data.
|
||||
memprof::DataAccessProfData *getDataAccessProfileData() const {
|
||||
return DataAccessProfileData.get();
|
||||
}
|
||||
|
||||
// Return the entire MemProf profile.
|
||||
LLVM_ABI memprof::AllMemProfData getAllMemProfData() const;
|
||||
|
||||
@ -900,6 +905,12 @@ public:
|
||||
return MemProfReader.getSummary();
|
||||
}
|
||||
|
||||
/// Returns non-owned pointer to the data access profile data.
|
||||
/// Will be null if unavailable (version < 4).
|
||||
memprof::DataAccessProfData *getDataAccessProfileData() const {
|
||||
return MemProfReader.getDataAccessProfileData();
|
||||
}
|
||||
|
||||
Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) override;
|
||||
Error printBinaryIds(raw_ostream &OS) override;
|
||||
};
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/ProfileData/DataAccessProf.h"
|
||||
#include "llvm/ProfileData/InstrProf.h"
|
||||
#include "llvm/ProfileData/InstrProfReader.h"
|
||||
#include "llvm/ProfileData/MemProfCommon.h"
|
||||
@ -75,6 +76,10 @@ static cl::opt<unsigned> MinMatchedColdBytePercent(
|
||||
"memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
|
||||
cl::desc("Min percent of cold bytes matched to hint allocation cold"));
|
||||
|
||||
static cl::opt<bool> AnnotationStaticDataPrefix(
|
||||
"annotate-static-data-prefix", cl::init(false), cl::Hidden,
|
||||
cl::desc("If true, annotate the static data section prefix"));
|
||||
|
||||
// Matching statistics
|
||||
STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
|
||||
STATISTIC(NumOfMemProfMismatch,
|
||||
@ -750,5 +755,34 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
|
||||
}
|
||||
}
|
||||
|
||||
memprof::DataAccessProfData *DataAccessProf =
|
||||
MemProfReader->getDataAccessProfileData();
|
||||
|
||||
if (!AnnotationStaticDataPrefix || !DataAccessProf) {
|
||||
return PreservedAnalyses::none();
|
||||
}
|
||||
|
||||
for (GlobalVariable &GVar : M.globals()) {
|
||||
if (GVar.isDeclarationForLinker())
|
||||
continue;
|
||||
StringRef Name = GVar.getName();
|
||||
// TODO: Evaluate string hashing options (preferably `stable_hash_name`
|
||||
// from llvm/ADT/StableHashing.h) for string literals and annotate string
|
||||
// hotness prefixes.
|
||||
if (Name.starts_with(".str"))
|
||||
continue;
|
||||
|
||||
// DataAccessProfRecord's look-up methods will canonicalize the variable
|
||||
// name before looking up methods, so optimizer doesn't need to do it.
|
||||
std::optional<DataAccessProfRecord> Record =
|
||||
DataAccessProf->getProfileRecord(Name);
|
||||
// For now regard a global variable as hot if it has non-zero sampled count.
|
||||
if (Record && Record->AccessCount > 0)
|
||||
GVar.setSectionPrefix("hot");
|
||||
|
||||
if (DataAccessProf->isKnownColdSymbol(Name))
|
||||
GVar.setSectionPrefix("unlikely");
|
||||
}
|
||||
|
||||
return PreservedAnalyses::none();
|
||||
}
|
||||
|
76
llvm/test/Transforms/PGOProfile/data-access-profile.ll
Normal file
76
llvm/test/Transforms/PGOProfile/data-access-profile.ll
Normal file
@ -0,0 +1,76 @@
|
||||
; RUN: rm -rf %t && split-file %s %t && cd %t
|
||||
|
||||
;; Read a text profile and merge it into indexed profile.
|
||||
; RUN: llvm-profdata merge --memprof-version=4 memprof.yaml -o memprof.profdata
|
||||
|
||||
;; Run optimizer pass on the IR, and check the section prefix.
|
||||
; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -annotate-static-data-prefix -S input.ll -o - | FileCheck %s
|
||||
|
||||
;; String literals are not annotated.
|
||||
; CHECK: @.str = unnamed_addr constant [5 x i8] c"abcde"
|
||||
; CHECK-NOT: section_prefix
|
||||
; CHECK: @var1 = global i32 123, !section_prefix !0
|
||||
|
||||
;; @var.llvm.125 will be canonicalized to @var2 for profile look-up.
|
||||
; CHECK-NEXT: @var2.llvm.125 = global i64 0, !section_prefix !0
|
||||
; CHECK-NEXT: @foo = global i8 2, !section_prefix !1
|
||||
|
||||
;; @bar is not seen in hot symbol or known symbol set, so it doesn't get
|
||||
;; a section prefix. It's up to the linker to decide how to map input sections
|
||||
;; to output, and one conservative practice is to map unlikely-prefixed ones to
|
||||
;; unlikely output section, and map the rest (hot-prefixed or prefix-less) to
|
||||
;; the canonical output section.
|
||||
; CHECK-NEXT: @bar = global i16 3
|
||||
|
||||
; CHECK: !0 = !{!"section_prefix", !"hot"}
|
||||
; CHECK-NEXT: !1 = !{!"section_prefix", !"unlikely"}
|
||||
|
||||
;--- memprof.yaml
|
||||
---
|
||||
HeapProfileRecords:
|
||||
- GUID: 0xdeadbeef12345678
|
||||
AllocSites:
|
||||
- Callstack:
|
||||
- { Function: 0x1111111111111111, LineOffset: 11, Column: 10, IsInlineFrame: true }
|
||||
- { Function: 0x2222222222222222, LineOffset: 22, Column: 20, IsInlineFrame: false }
|
||||
MemInfoBlock:
|
||||
AllocCount: 111
|
||||
TotalSize: 222
|
||||
TotalLifetime: 333
|
||||
TotalLifetimeAccessDensity: 444
|
||||
CallSites:
|
||||
- Frames:
|
||||
- { Function: 0x5555555555555555, LineOffset: 55, Column: 50, IsInlineFrame: true }
|
||||
- { Function: 0x6666666666666666, LineOffset: 66, Column: 60, IsInlineFrame: false }
|
||||
CalleeGuids: [ 0x100, 0x200 ]
|
||||
DataAccessProfiles:
|
||||
SampledRecords:
|
||||
- Symbol: var1
|
||||
AccessCount: 1000
|
||||
- Symbol: var2
|
||||
AccessCount: 5
|
||||
- Hash: 101010
|
||||
AccessCount: 145
|
||||
KnownColdSymbols:
|
||||
- foo
|
||||
KnownColdStrHashes: [ 999, 1001 ]
|
||||
...
|
||||
;--- input.ll
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
@.str = unnamed_addr constant [5 x i8] c"abcde"
|
||||
@var1 = global i32 123
|
||||
@var2.llvm.125 = global i64 0
|
||||
@foo = global i8 2
|
||||
@bar = global i16 3
|
||||
|
||||
define i32 @func() {
|
||||
%a = load i32, ptr @var1
|
||||
%b = load i32, ptr @var2.llvm.125
|
||||
%ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
declare i32 @func_taking_arbitrary_param(...)
|
Loading…
x
Reference in New Issue
Block a user