
`TotalRootEntryCount` captures how many times that root was entered - regardless if a profile was also collected or not (profile collection for a given root happens on only one thread at a time). We don't do this in compiler_rt because the goal there is to flush out the data as fast as possible, so traversing and multiplying vectors is punted to the profile user. We really just need to do this when flattening the profile so that the values across roots and flat profiles match. We could do it earlier, too - like when loading the profile - but it seems beneficial (at least for debugging) to keep the counter values the same as the loaded ones. We can revisit this later.
132 lines
5.1 KiB
LLVM
132 lines
5.1 KiB
LLVM
; REQUIRES: x86_64-linux
|
|
; RUN: rm -rf %t
|
|
; RUN: split-file %s %t
|
|
; RUN: llvm-ctxprof-util fromYAML --input=%t/profile.yaml --output=%t/profile.ctxprofdata
|
|
|
|
; RUN: opt -passes='module-inline,print<ctx-prof-analysis>' -ctx-profile-printer-level=everything %t/1000.ll -S \
|
|
; RUN: -use-ctx-profile=%t/profile.ctxprofdata -ctx-profile-printer-level=yaml \
|
|
; RUN: -o - 2> %t/profile-final.yaml | FileCheck %s
|
|
; RUN: diff %t/profile-final.yaml %t/expected.yaml
|
|
|
|
; There are 2 calls to @a from @entrypoint. We only inline the one callsite
|
|
; marked as alwaysinline, the rest are blocked (marked noinline). After the inline,
|
|
; the updated contextual profile should still have the same tree for the non-inlined case.
|
|
; For the inlined case, we should observe, for the @entrypoint context:
|
|
; - an empty callsite where the inlined one was (first one, i.e. 0)
|
|
; - more counters appended to the old counter list (because we ingested the
|
|
; ones from @a). The values are copied.
|
|
; - a new callsite to @b
|
|
; CHECK-LABEL: @entrypoint
|
|
; CHECK-LABEL: yes:
|
|
; CHECK: call void @llvm.instrprof.increment(ptr @entrypoint, i64 0, i32 3, i32 1)
|
|
; CHECK-NEXT: br label %loop.i
|
|
; CHECK-LABEL: loop.i:
|
|
; CHECK-NEXT: %indvar.i = phi i32 [ %indvar.next.i, %loop.i ], [ 0, %yes ]
|
|
; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @entrypoint, i64 0, i32 2, i32 3)
|
|
; CHECK-NEXT: %b.i = add i32 %x, %indvar.i
|
|
; CHECK-NEXT: call void @llvm.instrprof.callsite(ptr @entrypoint, i64 0, i32 1, i32 2, ptr @b)
|
|
; CHECK-NEXT: %call3.i = call i32 @b() #1
|
|
; CHECK-LABEL: no:
|
|
; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @entrypoint, i64 0, i32 3, i32 2)
|
|
; CHECK-NEXT: call void @llvm.instrprof.callsite(ptr @entrypoint, i64 0, i32 2, i32 1, ptr @a)
|
|
; CHECK-NEXT: %call2 = call i32 @a(i32 %x) #1
|
|
; CHECK-NEXT: br label %exit
|
|
|
|
; Make sure the postlink thinlto pipeline is aware of ctxprof
|
|
; RUN: opt -passes='thinlto<O2>' -use-ctx-profile=%t/profile.ctxprofdata \
|
|
; RUN: %t/1000.ll -S -o - | FileCheck %s --check-prefix=PIPELINE
|
|
|
|
; PIPELINE-LABEL: define i32 @entrypoint
|
|
; PIPELINE-SAME: !prof ![[ENTRYPOINT_COUNT:[0-9]+]]
|
|
; PIPELINE-LABEL: loop.i:
|
|
; PIPELINE: br i1 %cond.i, label %loop.i, label %exit, !prof ![[LOOP_BW_INL:[0-9]+]]
|
|
; PIPELINE-LABEL: define i32 @a
|
|
; PIPELINE-LABEL: loop:
|
|
; PIPELINE: br i1 %cond, label %loop, label %exit, !prof ![[LOOP_BW_ORIG:[0-9]+]]
|
|
|
|
; *Note* that all values are multiplied by the TotalRootEntryCount, which is 24
|
|
;
|
|
; PIPELINE: ![[ENTRYPOINT_COUNT]] = !{!"function_entry_count", i64 240}
|
|
; These are the weights of the inlined @a, where the counters were 2, 100 (2 for entry, 100 for loop)
|
|
; PIPELINE: ![[LOOP_BW_INL]] = !{!"branch_weights", i32 2352, i32 48}
|
|
; These are the weights of the un-inlined @a, where the counters were 8, 500 (8 for entry, 500 for loop)
|
|
; PIPELINE: ![[LOOP_BW_ORIG]] = !{!"branch_weights", i32 11808, i32 192}
|
|
|
|
;--- 1000.ll
|
|
define i32 @entrypoint(i32 %x) !guid !0 {
|
|
call void @llvm.instrprof.increment(ptr @entrypoint, i64 0, i32 3, i32 0)
|
|
%t = icmp eq i32 %x, 0
|
|
br i1 %t, label %yes, label %no
|
|
yes:
|
|
call void @llvm.instrprof.increment(ptr @entrypoint, i64 0, i32 3, i32 1)
|
|
call void @llvm.instrprof.callsite(ptr @entrypoint, i64 0, i32 2, i32 0, ptr @a)
|
|
%call1 = call i32 @a(i32 %x) alwaysinline
|
|
br label %exit
|
|
no:
|
|
call void @llvm.instrprof.increment(ptr @entrypoint, i64 0, i32 3, i32 2)
|
|
call void @llvm.instrprof.callsite(ptr @entrypoint, i64 0, i32 2, i32 1, ptr @a)
|
|
%call2 = call i32 @a(i32 %x) noinline
|
|
br label %exit
|
|
exit:
|
|
%ret = phi i32 [%call1, %yes], [%call2, %no]
|
|
ret i32 %ret
|
|
}
|
|
|
|
define i32 @a(i32 %x) !guid !1 {
|
|
entry:
|
|
call void @llvm.instrprof.increment(ptr @a, i64 0, i32 2, i32 0)
|
|
br label %loop
|
|
loop:
|
|
%indvar = phi i32 [%indvar.next, %loop], [0, %entry]
|
|
call void @llvm.instrprof.increment(ptr @a, i64 0, i32 2, i32 1)
|
|
%b = add i32 %x, %indvar
|
|
call void @llvm.instrprof.callsite(ptr @a, i64 0, i32 1, i32 0, ptr @b)
|
|
%call3 = call i32 @b() noinline
|
|
%indvar.next = add i32 %indvar, %call3
|
|
%cond = icmp slt i32 %indvar.next, %x
|
|
br i1 %cond, label %loop, label %exit
|
|
exit:
|
|
ret i32 8
|
|
}
|
|
|
|
define i32 @b() !guid !2 {
|
|
call void @llvm.instrprof.increment(ptr @b, i64 0, i32 1, i32 0)
|
|
ret i32 1
|
|
}
|
|
|
|
!0 = !{i64 1000}
|
|
!1 = !{i64 1001}
|
|
!2 = !{i64 1002}
|
|
;--- profile.yaml
|
|
Contexts:
|
|
- Guid: 1000
|
|
TotalRootEntryCount: 24
|
|
Counters: [10, 2, 8]
|
|
Callsites: -
|
|
- Guid: 1001
|
|
Counters: [2, 100]
|
|
Callsites: -
|
|
- Guid: 1002
|
|
Counters: [100]
|
|
-
|
|
- Guid: 1001
|
|
Counters: [8, 500]
|
|
Callsites: -
|
|
- Guid: 1002
|
|
Counters: [500]
|
|
;--- expected.yaml
|
|
|
|
Contexts:
|
|
- Guid: 1000
|
|
TotalRootEntryCount: 24
|
|
Counters: [ 10, 2, 8, 100 ]
|
|
Callsites:
|
|
- [ ]
|
|
- - Guid: 1001
|
|
Counters: [ 8, 500 ]
|
|
Callsites:
|
|
- - Guid: 1002
|
|
Counters: [ 500 ]
|
|
- - Guid: 1002
|
|
Counters: [ 100 ]
|