
BlockFrequencyInfo calculates block frequencies as Scaled64 numbers but as a last step converts them to unsigned 64bit integers (`BlockFrequency`). This improves the factors picked for this conversion so that: * Avoid big numbers close to UINT64_MAX to avoid users overflowing/saturating when adding multiply frequencies together or when multiplying with integers. This leaves the topmost 10 bits unused to allow for some room. * Spread the difference between hottest/coldest block as much as possible to increase precision. * If the hot/cold spread cannot be represented loose precision at the lower end, but keep the frequencies at the upper end for hot blocks differentiable.
134 lines
3.3 KiB
LLVM
134 lines
3.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
|
|
|
|
; Cold function, %dup should not be duplicated into predecessors.
|
|
define i32 @cold(i32 %a, ptr %p, ptr %q) !prof !21 {
|
|
; CHECK-LABEL: cold:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: cmpl $2, %edi
|
|
; CHECK-NEXT: jl .LBB0_2
|
|
; CHECK-NEXT: # %bb.1: # %true1
|
|
; CHECK-NEXT: movl (%rsi), %eax
|
|
; CHECK-NEXT: addl $2, %eax
|
|
; CHECK-NEXT: .LBB0_3: # %dup
|
|
; CHECK-NEXT: cmpl $5, %eax
|
|
; CHECK-NEXT: jl .LBB0_5
|
|
; CHECK-NEXT: # %bb.4: # %true2
|
|
; CHECK-NEXT: xorl %edi, %eax
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB0_2: # %false1
|
|
; CHECK-NEXT: movl (%rdx), %eax
|
|
; CHECK-NEXT: addl $-3, %eax
|
|
; CHECK-NEXT: jmp .LBB0_3
|
|
; CHECK-NEXT: .LBB0_5: # %false2
|
|
; CHECK-NEXT: andl %edi, %eax
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%cond1 = icmp sgt i32 %a, 1
|
|
br i1 %cond1, label %true1, label %false1, !prof !30
|
|
|
|
true1:
|
|
%v1 = load i32, ptr %p, align 4
|
|
%v2 = add i32 %v1, 2
|
|
br label %dup
|
|
|
|
false1:
|
|
%v3 = load i32, ptr %q, align 4
|
|
%v4 = sub i32 %v3, 3
|
|
br label %dup
|
|
|
|
dup:
|
|
%v5 = phi i32 [%v2, %true1], [%v4, %false1]
|
|
%cond2 = icmp sgt i32 %v5, 4
|
|
br i1 %cond2, label %true2, label %false2, !prof !30
|
|
|
|
true2:
|
|
%v6 = xor i32 %v5, %a
|
|
br label %exit
|
|
|
|
false2:
|
|
%v7 = and i32 %v5, %a
|
|
br label %exit
|
|
|
|
exit:
|
|
%v8 = phi i32 [%v6, %true2], [%v7, %false2]
|
|
ret i32 %v8
|
|
}
|
|
|
|
; Same code as previous function, but with hot profile count.
|
|
; So %dup should be duplicated into predecessors.
|
|
define i32 @hot(i32 %a, ptr %p, ptr %q) !prof !22 {
|
|
; CHECK-LABEL: hot:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: cmpl $2, %edi
|
|
; CHECK-NEXT: jl .LBB1_2
|
|
; CHECK-NEXT: # %bb.1: # %true1
|
|
; CHECK-NEXT: movl (%rsi), %eax
|
|
; CHECK-NEXT: addl $2, %eax
|
|
; CHECK-NEXT: cmpl $5, %eax
|
|
; CHECK-NEXT: jge .LBB1_4
|
|
; CHECK-NEXT: .LBB1_5: # %false2
|
|
; CHECK-NEXT: andl %edi, %eax
|
|
; CHECK-NEXT: retq
|
|
; CHECK-NEXT: .LBB1_2: # %false1
|
|
; CHECK-NEXT: movl (%rdx), %eax
|
|
; CHECK-NEXT: addl $-3, %eax
|
|
; CHECK-NEXT: cmpl $5, %eax
|
|
; CHECK-NEXT: jl .LBB1_5
|
|
; CHECK-NEXT: .LBB1_4: # %true2
|
|
; CHECK-NEXT: xorl %edi, %eax
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%cond1 = icmp sgt i32 %a, 1
|
|
br i1 %cond1, label %true1, label %false1, !prof !30
|
|
|
|
true1:
|
|
%v1 = load i32, ptr %p, align 4
|
|
%v2 = add i32 %v1, 2
|
|
br label %dup
|
|
|
|
false1:
|
|
%v3 = load i32, ptr %q, align 4
|
|
%v4 = sub i32 %v3, 3
|
|
br label %dup
|
|
|
|
dup:
|
|
%v5 = phi i32 [%v2, %true1], [%v4, %false1]
|
|
%cond2 = icmp sgt i32 %v5, 4
|
|
br i1 %cond2, label %true2, label %false2, !prof !30
|
|
|
|
true2:
|
|
%v6 = xor i32 %v5, %a
|
|
br label %exit
|
|
|
|
false2:
|
|
%v7 = and i32 %v5, %a
|
|
br label %exit
|
|
|
|
exit:
|
|
%v8 = phi i32 [%v6, %true2], [%v7, %false2]
|
|
ret i32 %v8
|
|
}
|
|
|
|
|
|
!llvm.module.flags = !{!1}
|
|
!21 = !{!"function_entry_count", i64 10}
|
|
!22 = !{!"function_entry_count", i64 400}
|
|
|
|
!30 = !{!"branch_weights", i32 1, i32 1}
|
|
|
|
!1 = !{i32 1, !"ProfileSummary", !2}
|
|
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
|
|
!3 = !{!"ProfileFormat", !"InstrProf"}
|
|
!4 = !{!"TotalCount", i64 10000}
|
|
!5 = !{!"MaxCount", i64 10}
|
|
!6 = !{!"MaxInternalCount", i64 1}
|
|
!7 = !{!"MaxFunctionCount", i64 1000}
|
|
!8 = !{!"NumCounts", i64 3}
|
|
!9 = !{!"NumFunctions", i64 3}
|
|
!10 = !{!"DetailedSummary", !11}
|
|
!11 = !{!12, !13, !14}
|
|
!12 = !{i32 10000, i64 100, i32 1}
|
|
!13 = !{i32 999000, i64 100, i32 1}
|
|
!14 = !{i32 999999, i64 1, i32 2}
|