Mircea Trofin 93b74f7178
[ctxprof] Scale up everything under a root by its TotalRootEntryCount (#136015)
`TotalRootEntryCount` captures how many times that root was entered - regardless if a profile was also collected or not (profile collection for a given root happens on only one thread at a time).

We don't do this in compiler_rt because the goal there is to flush out the data as fast as possible, so traversing and multiplying vectors is punted to the profile user.

We really just need to do this when flattening the profile so that the values across roots and flat profiles match. We could do it earlier, too - like when loading the profile - but it seems beneficial (at least for debugging) to keep the counter values the same as the loaded ones. We can revisit this later.
2025-04-21 08:43:21 -07:00

132 lines
5.1 KiB
LLVM

; REQUIRES: x86_64-linux
; RUN: rm -rf %t
; RUN: split-file %s %t
; RUN: llvm-ctxprof-util fromYAML --input=%t/profile.yaml --output=%t/profile.ctxprofdata
; RUN: opt -passes='module-inline,print<ctx-prof-analysis>' -ctx-profile-printer-level=everything %t/1000.ll -S \
; RUN: -use-ctx-profile=%t/profile.ctxprofdata -ctx-profile-printer-level=yaml \
; RUN: -o - 2> %t/profile-final.yaml | FileCheck %s
; RUN: diff %t/profile-final.yaml %t/expected.yaml
; There are 2 calls to @a from @entrypoint. We only inline the one callsite
; marked as alwaysinline, the rest are blocked (marked noinline). After the inline,
; the updated contextual profile should still have the same tree for the non-inlined case.
; For the inlined case, we should observe, for the @entrypoint context:
; - an empty callsite where the inlined one was (first one, i.e. 0)
; - more counters appended to the old counter list (because we ingested the
; ones from @a). The values are copied.
; - a new callsite to @b
; CHECK-LABEL: @entrypoint
; CHECK-LABEL: yes:
; CHECK: call void @llvm.instrprof.increment(ptr @entrypoint, i64 0, i32 3, i32 1)
; CHECK-NEXT: br label %loop.i
; CHECK-LABEL: loop.i:
; CHECK-NEXT: %indvar.i = phi i32 [ %indvar.next.i, %loop.i ], [ 0, %yes ]
; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @entrypoint, i64 0, i32 2, i32 3)
; CHECK-NEXT: %b.i = add i32 %x, %indvar.i
; CHECK-NEXT: call void @llvm.instrprof.callsite(ptr @entrypoint, i64 0, i32 1, i32 2, ptr @b)
; CHECK-NEXT: %call3.i = call i32 @b() #1
; CHECK-LABEL: no:
; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @entrypoint, i64 0, i32 3, i32 2)
; CHECK-NEXT: call void @llvm.instrprof.callsite(ptr @entrypoint, i64 0, i32 2, i32 1, ptr @a)
; CHECK-NEXT: %call2 = call i32 @a(i32 %x) #1
; CHECK-NEXT: br label %exit
; Make sure the postlink thinlto pipeline is aware of ctxprof
; RUN: opt -passes='thinlto<O2>' -use-ctx-profile=%t/profile.ctxprofdata \
; RUN: %t/1000.ll -S -o - | FileCheck %s --check-prefix=PIPELINE
; PIPELINE-LABEL: define i32 @entrypoint
; PIPELINE-SAME: !prof ![[ENTRYPOINT_COUNT:[0-9]+]]
; PIPELINE-LABEL: loop.i:
; PIPELINE: br i1 %cond.i, label %loop.i, label %exit, !prof ![[LOOP_BW_INL:[0-9]+]]
; PIPELINE-LABEL: define i32 @a
; PIPELINE-LABEL: loop:
; PIPELINE: br i1 %cond, label %loop, label %exit, !prof ![[LOOP_BW_ORIG:[0-9]+]]
; *Note* that all values are multiplied by the TotalRootEntryCount, which is 24
;
; PIPELINE: ![[ENTRYPOINT_COUNT]] = !{!"function_entry_count", i64 240}
; These are the weights of the inlined @a, where the counters were 2, 100 (2 for entry, 100 for loop)
; PIPELINE: ![[LOOP_BW_INL]] = !{!"branch_weights", i32 2352, i32 48}
; These are the weights of the un-inlined @a, where the counters were 8, 500 (8 for entry, 500 for loop)
; PIPELINE: ![[LOOP_BW_ORIG]] = !{!"branch_weights", i32 11808, i32 192}
;--- 1000.ll
define i32 @entrypoint(i32 %x) !guid !0 {
call void @llvm.instrprof.increment(ptr @entrypoint, i64 0, i32 3, i32 0)
%t = icmp eq i32 %x, 0
br i1 %t, label %yes, label %no
yes:
call void @llvm.instrprof.increment(ptr @entrypoint, i64 0, i32 3, i32 1)
call void @llvm.instrprof.callsite(ptr @entrypoint, i64 0, i32 2, i32 0, ptr @a)
%call1 = call i32 @a(i32 %x) alwaysinline
br label %exit
no:
call void @llvm.instrprof.increment(ptr @entrypoint, i64 0, i32 3, i32 2)
call void @llvm.instrprof.callsite(ptr @entrypoint, i64 0, i32 2, i32 1, ptr @a)
%call2 = call i32 @a(i32 %x) noinline
br label %exit
exit:
%ret = phi i32 [%call1, %yes], [%call2, %no]
ret i32 %ret
}
define i32 @a(i32 %x) !guid !1 {
entry:
call void @llvm.instrprof.increment(ptr @a, i64 0, i32 2, i32 0)
br label %loop
loop:
%indvar = phi i32 [%indvar.next, %loop], [0, %entry]
call void @llvm.instrprof.increment(ptr @a, i64 0, i32 2, i32 1)
%b = add i32 %x, %indvar
call void @llvm.instrprof.callsite(ptr @a, i64 0, i32 1, i32 0, ptr @b)
%call3 = call i32 @b() noinline
%indvar.next = add i32 %indvar, %call3
%cond = icmp slt i32 %indvar.next, %x
br i1 %cond, label %loop, label %exit
exit:
ret i32 8
}
define i32 @b() !guid !2 {
call void @llvm.instrprof.increment(ptr @b, i64 0, i32 1, i32 0)
ret i32 1
}
!0 = !{i64 1000}
!1 = !{i64 1001}
!2 = !{i64 1002}
;--- profile.yaml
Contexts:
- Guid: 1000
TotalRootEntryCount: 24
Counters: [10, 2, 8]
Callsites: -
- Guid: 1001
Counters: [2, 100]
Callsites: -
- Guid: 1002
Counters: [100]
-
- Guid: 1001
Counters: [8, 500]
Callsites: -
- Guid: 1002
Counters: [500]
;--- expected.yaml
Contexts:
- Guid: 1000
TotalRootEntryCount: 24
Counters: [ 10, 2, 8, 100 ]
Callsites:
- [ ]
- - Guid: 1001
Counters: [ 8, 500 ]
Callsites:
- - Guid: 1002
Counters: [ 500 ]
- - Guid: 1002
Counters: [ 100 ]