[SampleProfileMatcher] Fix backward matching of non-anchor locations (#190118)

The backward matching loop in `matchNonCallsiteLocs` was ineffective
because `InsertMatching` used `std::unordered_map::insert()` which does
not overwrite existing entries. Since forward matching already inserted
entries for all non-anchor locations, the backward matching for the
second half was silently ignored.

The backward matching can update forward mappings in
`IRToProfileLocationMap` in 2 ways:
- The IR location maps a new different profile location. Change
`insert()` to `insert_or_assign()` so that entry overwrite can happen.
- The IR location maps the same profile location. Add `erase()` to
remove such mapping.
This commit is contained in:
Wei Wang 2026-04-06 15:21:31 -07:00 committed by GitHub
parent e8566f83d2
commit 1ae179b325
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 106 additions and 6 deletions

View File

@ -234,10 +234,12 @@ SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
void SampleProfileMatcher::matchNonCallsiteLocs(
const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
LocToLocMap &IRToProfileLocationMap) {
auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
auto UpdateMatching = [&](const LineLocation &From, const LineLocation &To) {
// Skip the unchanged location mapping to save memory.
if (From != To)
IRToProfileLocationMap.insert({From, To});
IRToProfileLocationMap.insert_or_assign(From, To);
else
IRToProfileLocationMap.erase(From);
};
// Use function's beginning location as the initial anchor.
@ -250,7 +252,7 @@ void SampleProfileMatcher::matchNonCallsiteLocs(
auto R = MatchedAnchors.find(Loc);
if (R != MatchedAnchors.end()) {
const auto &Candidate = R->second;
InsertMatching(Loc, Candidate);
UpdateMatching(Loc, Candidate);
LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
<< " is matched from " << Loc << " to " << Candidate
<< "\n");
@ -258,14 +260,14 @@ void SampleProfileMatcher::matchNonCallsiteLocs(
// Match backwards for non-anchor locations.
// The locations in LastMatchedNonAnchors have been matched forwards
// based on the previous anchor, spilt it evenly and overwrite the
// based on the previous anchor, split it evenly and overwrite the
// second half based on the current anchor.
for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
I < LastMatchedNonAnchors.size(); I++) {
const auto &L = LastMatchedNonAnchors[I];
uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
LineLocation Candidate(CandidateLineOffset, L.Discriminator);
InsertMatching(L, Candidate);
UpdateMatching(L, Candidate);
LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
<< " to " << Candidate << "\n");
}
@ -278,7 +280,7 @@ void SampleProfileMatcher::matchNonCallsiteLocs(
if (!IsMatchedAnchor) {
uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
InsertMatching(Loc, Candidate);
UpdateMatching(Loc, Candidate);
LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
<< Candidate << "\n");
LastMatchedNonAnchors.emplace_back(Loc);

View File

@ -0,0 +1,6 @@
test_backward_matching_probe:5000:0
5: 100 foo:100
7: 111
11: 999
12: 400 bar:400
!CFGChecksum: 999999

View File

@ -0,0 +1,92 @@
; REQUIRES: asserts && x86-registered-target
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-backward-matching.prof --salvage-stale-profile -S --debug-only=sample-profile-impl,sample-profile-matcher 2>&1 | FileCheck %s
; Test that backward matching in matchNonCallsiteLocs correctly overwrites
; forward-matched entries for the second half of non-anchor locations between
; two matched anchors, using pseudo-probe-based profile.
;
; IR probes (ordered by probe ID):
; 1(block), 2(block), 3(call foo), 4(block), 5(block), 6(call bar), 7(block), 8(block)
;
; Profile has foo at probe 5 and bar at probe 12 (checksum mismatch triggers
; stale profile matching).
;
; Anchor matching by LCS:
; foo: IR probe 3 -> Profile probe 5 (delta = +2)
; bar: IR probe 6 -> Profile probe 12 (delta = +6)
;
; Block probes [4, 5] are non-anchors between foo and bar.
; Split evenly:
; First half [probe 4]: forward delta=+2 -> maps to profile probe 6 (no samples)
; Second half [probe 5]: backward delta=+6 -> maps to profile probe 11 (body sample 999)
;
; Without the fix, probe 5 stays at forward value 5+2=7 (body sample 111).
; With the fix (insert_or_assign), backward overwrites to 5+6=11 (body sample
; 999).
; Verify anchor and non-anchor matching:
; CHECK: Callsite with callee:foo is matched from 3 to 5
; CHECK: Location is matched from 4 to 6
; CHECK: Location is matched from 5 to 7
; CHECK: Callsite with callee:bar is matched from 6 to 12
; CHECK: Location is rematched backwards from 5 to 11
; Verify the sample weight at probe 5 uses backward-matched value (999).
; Without the fix, this would show 111.
; CHECK: call void @llvm.pseudoprobe(i64 6355742111584357505, i64 5, i32 0, i64 -1){{.*}}weight: 999
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
declare i32 @foo(i32)
declare i32 @bar(i32)
define dso_local i32 @test_backward_matching_probe() #0 !dbg !10 {
entry:
call void @llvm.pseudoprobe(i64 6355742111584357505, i64 1, i32 0, i64 -1), !dbg !14
call void @llvm.pseudoprobe(i64 6355742111584357505, i64 2, i32 0, i64 -1), !dbg !14
; call foo probe ID 3 encoded in discriminator 186646559
%r1 = call i32 @foo(i32 1), !dbg !15
call void @llvm.pseudoprobe(i64 6355742111584357505, i64 4, i32 0, i64 -1), !dbg !14
call void @llvm.pseudoprobe(i64 6355742111584357505, i64 5, i32 0, i64 -1), !dbg !14
; call bar probe ID 6 encoded in discriminator 186646583
%r2 = call i32 @bar(i32 2), !dbg !17
call void @llvm.pseudoprobe(i64 6355742111584357505, i64 7, i32 0, i64 -1), !dbg !14
call void @llvm.pseudoprobe(i64 6355742111584357505, i64 8, i32 0, i64 -1), !dbg !14
%sum = add i32 %r1, %r2
ret i32 %sum, !dbg !14
}
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #1
attributes #0 = { nounwind uwtable "use-sample-profile" }
attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6, !7}
!llvm.ident = !{!8}
!llvm.pseudo_probe_desc = !{!13}
!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "test_probe_bm.c", directory: "/tmp")
!2 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!3 = !{i32 7, !"Dwarf Version", i32 5}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 4}
!6 = !{i32 7, !"uwtable", i32 2}
!7 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
!8 = !{!"clang version 19.0.0"}
!9 = !DIFile(filename: "test_probe_bm.c", directory: "/tmp")
!10 = distinct !DISubprogram(name: "test_backward_matching_probe", scope: !9, file: !9, line: 10, type: !11, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
!11 = !DISubroutineType(types: !12)
!12 = !{}
; GUID=6355742111584357505, CFGChecksum=123456 (mismatches profile's 999999)
!13 = !{i64 6355742111584357505, i64 123456, !"test_backward_matching_probe"}
; Generic debug location for block probes
!14 = !DILocation(line: 11, column: 3, scope: !10)
; call foo discriminator 186646559 encodes probe ID 3, type=direct call
!15 = !DILocation(line: 12, column: 8, scope: !16)
!16 = !DILexicalBlockFile(scope: !10, file: !9, discriminator: 186646559)
; call bar discriminator 186646583 encodes probe ID 6, type=direct call
!17 = !DILocation(line: 14, column: 8, scope: !18)
!18 = !DILexicalBlockFile(scope: !10, file: !9, discriminator: 186646583)