
BlockFrequencyInfo calculates block frequencies as Scaled64 numbers but as a last step converts them to unsigned 64bit integers (`BlockFrequency`). This improves the factors picked for this conversion so that: * Avoid big numbers close to UINT64_MAX to avoid users overflowing/saturating when adding multiply frequencies together or when multiplying with integers. This leaves the topmost 10 bits unused to allow for some room. * Spread the difference between hottest/coldest block as much as possible to increase precision. * If the hot/cold spread cannot be represented loose precision at the lower end, but keep the frequencies at the upper end for hot blocks differentiable.
254 lines
8.2 KiB
LLVM
254 lines
8.2 KiB
LLVM
; RUN: opt < %s -passes='print<block-freq>' -disable-output 2>&1 | FileCheck %s --check-prefixes=BFI_BEFORE
|
|
; RUN: opt < %s -passes='loop(loop-rotate),print<block-freq>' -disable-output 2>&1 | FileCheck %s --check-prefixes=BFI_AFTER
|
|
; RUN: opt < %s -passes='loop(loop-rotate)' -S | FileCheck %s --check-prefixes=IR
|
|
|
|
@g = global i32 0
|
|
|
|
; We should get the same "count =" results for "outer_loop_body" and
|
|
; "inner_loop_body" before and after the transformation.
|
|
|
|
; BFI_BEFORE-LABEL: block-frequency-info: func0
|
|
; BFI_BEFORE: - entry: {{.*}} count = 1
|
|
; BFI_BEFORE: - outer_loop_header: {{.*}} count = 1001
|
|
; BFI_BEFORE: - outer_loop_body: {{.*}} count = 1000
|
|
; BFI_BEFORE: - inner_loop_header: {{.*}} count = 4000
|
|
; BFI_BEFORE: - inner_loop_body: {{.*}} count = 3000
|
|
; BFI_BEFORE: - inner_loop_exit: {{.*}} count = 1000
|
|
; BFI_BEFORE: - outer_loop_exit: {{.*}} count = 1
|
|
|
|
; BFI_AFTER-LABEL: block-frequency-info: func0
|
|
; BFI_AFTER: - entry: {{.*}} count = 1
|
|
; BFI_AFTER: - outer_loop_body: {{.*}} count = 1000
|
|
; BFI_AFTER: - inner_loop_body: {{.*}} count = 3000
|
|
; BFI_AFTER: - inner_loop_exit: {{.*}} count = 1000
|
|
; BFI_AFTER: - outer_loop_exit: {{.*}} count = 1
|
|
|
|
; IR-LABEL: define void @func0
|
|
; IR: inner_loop_body:
|
|
; IR: br i1 %cmp1, label %inner_loop_body, label %inner_loop_exit, !prof [[PROF_FUNC0_0:![0-9]+]]
|
|
; IR: inner_loop_exit:
|
|
; IR: br i1 %cmp0, label %outer_loop_body, label %outer_loop_exit, !prof [[PROF_FUNC0_1:![0-9]+]]
|
|
;
|
|
; A function with known loop-bounds where after loop-rotation we end with an
|
|
; unconditional branch in the pre-header.
|
|
define void @func0() !prof !0 {
|
|
entry:
|
|
br label %outer_loop_header
|
|
|
|
outer_loop_header:
|
|
%i0 = phi i32 [0, %entry], [%i0_inc, %inner_loop_exit]
|
|
%cmp0 = icmp slt i32 %i0, 1000
|
|
br i1 %cmp0, label %outer_loop_body, label %outer_loop_exit, !prof !1
|
|
|
|
outer_loop_body:
|
|
store volatile i32 %i0, ptr @g, align 4
|
|
br label %inner_loop_header
|
|
|
|
inner_loop_header:
|
|
%i1 = phi i32 [0, %outer_loop_body], [%i1_inc, %inner_loop_body]
|
|
%cmp1 = icmp slt i32 %i1, 3
|
|
br i1 %cmp1, label %inner_loop_body, label %inner_loop_exit, !prof !2
|
|
|
|
inner_loop_body:
|
|
store volatile i32 %i1, ptr @g, align 4
|
|
%i1_inc = add i32 %i1, 1
|
|
br label %inner_loop_header
|
|
|
|
inner_loop_exit:
|
|
%i0_inc = add i32 %i0, 1
|
|
br label %outer_loop_header
|
|
|
|
outer_loop_exit:
|
|
ret void
|
|
}
|
|
|
|
; BFI_BEFORE-LABEL: block-frequency-info: func1
|
|
; BFI_BEFORE: - entry: {{.*}} count = 1024
|
|
; BFI_BEFORE: - loop_header: {{.*}} count = 21504
|
|
; BFI_BEFORE: - loop_body: {{.*}} count = 20480
|
|
; BFI_BEFORE: - loop_exit: {{.*}} count = 1024
|
|
|
|
; BFI_AFTER-LABEL: block-frequency-info: func1
|
|
; BFI_AFTER: - entry: {{.*}} count = 1024
|
|
; BFI_AFTER: - loop_body.lr.ph: {{.*}} count = 1016
|
|
; BFI_AFTER: - loop_body: {{.*}} count = 20480
|
|
; BFI_AFTER: - loop_header.loop_exit_crit_edge: {{.*}} count = 1016
|
|
; BFI_AFTER: - loop_exit: {{.*}} count = 1024
|
|
|
|
; IR-LABEL: define void @func1
|
|
; IR: entry:
|
|
; IR: br i1 %cmp1, label %loop_body.lr.ph, label %loop_exit, !prof [[PROF_FUNC1_0:![0-9]+]]
|
|
|
|
; IR: loop_body:
|
|
; IR: br i1 %cmp, label %loop_body, label %loop_header.loop_exit_crit_edge, !prof [[PROF_FUNC1_1:![0-9]+]]
|
|
|
|
; A function with unknown loop-bounds so loop-rotation ends up with a
|
|
; condition jump in pre-header and loop body. branch_weight shows body is
|
|
; executed more often than header.
|
|
define void @func1(i32 %n) !prof !3 {
|
|
entry:
|
|
br label %loop_header
|
|
|
|
loop_header:
|
|
%i = phi i32 [0, %entry], [%i_inc, %loop_body]
|
|
%cmp = icmp slt i32 %i, %n
|
|
br i1 %cmp, label %loop_body, label %loop_exit, !prof !4
|
|
|
|
loop_body:
|
|
store volatile i32 %i, ptr @g, align 4
|
|
%i_inc = add i32 %i, 1
|
|
br label %loop_header
|
|
|
|
loop_exit:
|
|
ret void
|
|
}
|
|
|
|
; BFI_BEFORE-LABEL: block-frequency-info: func2
|
|
; BFI_BEFORE: - entry: {{.*}} count = 1024
|
|
; BFI_BEFORE: - loop_header: {{.*}} count = 1056
|
|
; BFI_BEFORE: - loop_body: {{.*}} count = 32
|
|
; BFI_BEFORE: - loop_exit: {{.*}} count = 1024
|
|
|
|
; BFI_AFTER-LABEL: block-frequency-info: func2
|
|
; - entry: {{.*}} count = 1024
|
|
; - loop_body.lr.ph: {{.*}} count = 32
|
|
; - loop_body: {{.*}} count = 32
|
|
; - loop_header.loop_exit_crit_edge: {{.*}} count = 32
|
|
; - loop_exit: {{.*}} count = 1024
|
|
|
|
; IR-LABEL: define void @func2
|
|
; IR: entry:
|
|
; IR: br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC2_0:![0-9]+]]
|
|
|
|
; IR: loop_body:
|
|
; IR: br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC2_1:![0-9]+]]
|
|
|
|
; A function with unknown loop-bounds so loop-rotation ends up with a
|
|
; condition jump in pre-header and loop body. Similar to `func1` but here
|
|
; loop-exit count is higher than backedge count.
|
|
define void @func2(i32 %n) !prof !3 {
|
|
entry:
|
|
br label %loop_header
|
|
|
|
loop_header:
|
|
%i = phi i32 [0, %entry], [%i_inc, %loop_body]
|
|
%cmp = icmp slt i32 %i, %n
|
|
br i1 %cmp, label %loop_exit, label %loop_body, !prof !5
|
|
|
|
loop_body:
|
|
store volatile i32 %i, ptr @g, align 4
|
|
%i_inc = add i32 %i, 1
|
|
br label %loop_header
|
|
|
|
loop_exit:
|
|
ret void
|
|
}
|
|
|
|
; BFI_BEFORE-LABEL: block-frequency-info: func3_zero_branch_weight
|
|
; BFI_BEFORE: - entry: {{.*}} count = 1024
|
|
; BFI_BEFORE: - loop_header: {{.*}} count = 2199023255552
|
|
; BFI_BEFORE: - loop_body: {{.*}} count = 2199023254528
|
|
; BFI_BEFORE: - loop_exit: {{.*}} count = 1024
|
|
|
|
; BFI_AFTER-LABEL: block-frequency-info: func3_zero_branch_weight
|
|
; BFI_AFTER: - entry: {{.*}} count = 1024
|
|
; BFI_AFTER: - loop_body.lr.ph: {{.*}} count = 1024
|
|
; BFI_AFTER: - loop_body: {{.*}} count = 2199023255552
|
|
; BFI_AFTER: - loop_header.loop_exit_crit_edge: {{.*}} count = 1024
|
|
; BFI_AFTER: - loop_exit: {{.*}} count = 1024
|
|
|
|
; IR-LABEL: define void @func3_zero_branch_weight
|
|
; IR: entry:
|
|
; IR: br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC3_0:![0-9]+]]
|
|
|
|
; IR: loop_body:
|
|
; IR: br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC3_0]]
|
|
|
|
define void @func3_zero_branch_weight(i32 %n) !prof !3 {
|
|
entry:
|
|
br label %loop_header
|
|
|
|
loop_header:
|
|
%i = phi i32 [0, %entry], [%i_inc, %loop_body]
|
|
%cmp = icmp slt i32 %i, %n
|
|
br i1 %cmp, label %loop_exit, label %loop_body, !prof !6
|
|
|
|
loop_body:
|
|
store volatile i32 %i, ptr @g, align 4
|
|
%i_inc = add i32 %i, 1
|
|
br label %loop_header
|
|
|
|
loop_exit:
|
|
ret void
|
|
}
|
|
|
|
; IR-LABEL: define void @func4_zero_branch_weight
|
|
; IR: entry:
|
|
; IR: br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC4_0:![0-9]+]]
|
|
|
|
; IR: loop_body:
|
|
; IR: br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC4_0]]
|
|
|
|
define void @func4_zero_branch_weight(i32 %n) !prof !3 {
|
|
entry:
|
|
br label %loop_header
|
|
|
|
loop_header:
|
|
%i = phi i32 [0, %entry], [%i_inc, %loop_body]
|
|
%cmp = icmp slt i32 %i, %n
|
|
br i1 %cmp, label %loop_exit, label %loop_body, !prof !7
|
|
|
|
loop_body:
|
|
store volatile i32 %i, ptr @g, align 4
|
|
%i_inc = add i32 %i, 1
|
|
br label %loop_header
|
|
|
|
loop_exit:
|
|
ret void
|
|
}
|
|
|
|
; IR-LABEL: define void @func5_zero_branch_weight
|
|
; IR: entry:
|
|
; IR: br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC5_0:![0-9]+]]
|
|
|
|
; IR: loop_body:
|
|
; IR: br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC5_0]]
|
|
|
|
define void @func5_zero_branch_weight(i32 %n) !prof !3 {
|
|
entry:
|
|
br label %loop_header
|
|
|
|
loop_header:
|
|
%i = phi i32 [0, %entry], [%i_inc, %loop_body]
|
|
%cmp = icmp slt i32 %i, %n
|
|
br i1 %cmp, label %loop_exit, label %loop_body, !prof !8
|
|
|
|
loop_body:
|
|
store volatile i32 %i, ptr @g, align 4
|
|
%i_inc = add i32 %i, 1
|
|
br label %loop_header
|
|
|
|
loop_exit:
|
|
ret void
|
|
}
|
|
|
|
!0 = !{!"function_entry_count", i64 1}
|
|
!1 = !{!"branch_weights", i32 1000, i32 1}
|
|
!2 = !{!"branch_weights", i32 3000, i32 1000}
|
|
!3 = !{!"function_entry_count", i64 1024}
|
|
!4 = !{!"branch_weights", i32 40, i32 2}
|
|
!5 = !{!"branch_weights", i32 10240, i32 320}
|
|
!6 = !{!"branch_weights", i32 0, i32 1}
|
|
!7 = !{!"branch_weights", i32 1, i32 0}
|
|
!8 = !{!"branch_weights", i32 0, i32 0}
|
|
|
|
; IR: [[PROF_FUNC0_0]] = !{!"branch_weights", i32 2000, i32 1000}
|
|
; IR: [[PROF_FUNC0_1]] = !{!"branch_weights", i32 999, i32 1}
|
|
; IR: [[PROF_FUNC1_0]] = !{!"branch_weights", i32 127, i32 1}
|
|
; IR: [[PROF_FUNC1_1]] = !{!"branch_weights", i32 2433, i32 127}
|
|
; IR: [[PROF_FUNC2_0]] = !{!"branch_weights", i32 9920, i32 320}
|
|
; IR: [[PROF_FUNC2_1]] = !{!"branch_weights", i32 320, i32 0}
|
|
; IR: [[PROF_FUNC3_0]] = !{!"branch_weights", i32 0, i32 1}
|
|
; IR: [[PROF_FUNC4_0]] = !{!"branch_weights", i32 1, i32 0}
|
|
; IR: [[PROF_FUNC5_0]] = !{!"branch_weights", i32 0, i32 0}
|