
Branch weight from sample-based PGO may be not inaccurate due to sampling. If the loop body must be executed, then origin loop back edge weight must be not less than exit weight.
296 lines
9.5 KiB
LLVM
296 lines
9.5 KiB
LLVM
; RUN: opt < %s -passes='print<block-freq>' -disable-output 2>&1 | FileCheck %s --check-prefixes=BFI_BEFORE
|
|
; RUN: opt < %s -passes='loop(loop-rotate),print<block-freq>' -disable-output 2>&1 | FileCheck %s --check-prefixes=BFI_AFTER
|
|
; RUN: opt < %s -passes='loop(loop-rotate)' -S | FileCheck %s --check-prefixes=IR
|
|
|
|
@g = global i32 0
|
|
|
|
; We should get the same "count =" results for "outer_loop_body" and
|
|
; "inner_loop_body" before and after the transformation.
|
|
|
|
; BFI_BEFORE-LABEL: block-frequency-info: func0
|
|
; BFI_BEFORE: - entry: {{.*}} count = 1
|
|
; BFI_BEFORE: - outer_loop_header: {{.*}} count = 1001
|
|
; BFI_BEFORE: - outer_loop_body: {{.*}} count = 1000
|
|
; BFI_BEFORE: - inner_loop_header: {{.*}} count = 4000
|
|
; BFI_BEFORE: - inner_loop_body: {{.*}} count = 3000
|
|
; BFI_BEFORE: - inner_loop_exit: {{.*}} count = 1000
|
|
; BFI_BEFORE: - outer_loop_exit: {{.*}} count = 1
|
|
|
|
; BFI_AFTER-LABEL: block-frequency-info: func0
|
|
; BFI_AFTER: - entry: {{.*}} count = 1
|
|
; BFI_AFTER: - outer_loop_body: {{.*}} count = 1000
|
|
; BFI_AFTER: - inner_loop_body: {{.*}} count = 3000
|
|
; BFI_AFTER: - inner_loop_exit: {{.*}} count = 1000
|
|
; BFI_AFTER: - outer_loop_exit: {{.*}} count = 1
|
|
|
|
; IR-LABEL: define void @func0
|
|
; IR: inner_loop_body:
|
|
; IR: br i1 %cmp1, label %inner_loop_body, label %inner_loop_exit, !prof [[PROF_FUNC0_0:![0-9]+]]
|
|
; IR: inner_loop_exit:
|
|
; IR: br i1 %cmp0, label %outer_loop_body, label %outer_loop_exit, !prof [[PROF_FUNC0_1:![0-9]+]]
|
|
;
|
|
; A function with known loop-bounds where after loop-rotation we end with an
|
|
; unconditional branch in the pre-header.
|
|
define void @func0() !prof !0 {
|
|
entry:
|
|
br label %outer_loop_header
|
|
|
|
outer_loop_header:
|
|
%i0 = phi i32 [0, %entry], [%i0_inc, %inner_loop_exit]
|
|
%cmp0 = icmp slt i32 %i0, 1000
|
|
br i1 %cmp0, label %outer_loop_body, label %outer_loop_exit, !prof !1
|
|
|
|
outer_loop_body:
|
|
store volatile i32 %i0, ptr @g, align 4
|
|
br label %inner_loop_header
|
|
|
|
inner_loop_header:
|
|
%i1 = phi i32 [0, %outer_loop_body], [%i1_inc, %inner_loop_body]
|
|
%cmp1 = icmp slt i32 %i1, 3
|
|
br i1 %cmp1, label %inner_loop_body, label %inner_loop_exit, !prof !2
|
|
|
|
inner_loop_body:
|
|
store volatile i32 %i1, ptr @g, align 4
|
|
%i1_inc = add i32 %i1, 1
|
|
br label %inner_loop_header
|
|
|
|
inner_loop_exit:
|
|
%i0_inc = add i32 %i0, 1
|
|
br label %outer_loop_header
|
|
|
|
outer_loop_exit:
|
|
ret void
|
|
}
|
|
|
|
; BFI_BEFORE-LABEL: block-frequency-info: func1
|
|
; BFI_BEFORE: - entry: {{.*}} count = 1024
|
|
; BFI_BEFORE: - loop_header: {{.*}} count = 21504
|
|
; BFI_BEFORE: - loop_body: {{.*}} count = 20480
|
|
; BFI_BEFORE: - loop_exit: {{.*}} count = 1024
|
|
|
|
; BFI_AFTER-LABEL: block-frequency-info: func1
|
|
; BFI_AFTER: - entry: {{.*}} count = 1024
|
|
; BFI_AFTER: - loop_body.lr.ph: {{.*}} count = 1016
|
|
; BFI_AFTER: - loop_body: {{.*}} count = 20480
|
|
; BFI_AFTER: - loop_header.loop_exit_crit_edge: {{.*}} count = 1016
|
|
; BFI_AFTER: - loop_exit: {{.*}} count = 1024
|
|
|
|
; IR-LABEL: define void @func1
|
|
; IR: entry:
|
|
; IR: br i1 %cmp1, label %loop_body.lr.ph, label %loop_exit, !prof [[PROF_FUNC1_0:![0-9]+]]
|
|
|
|
; IR: loop_body:
|
|
; IR: br i1 %cmp, label %loop_body, label %loop_header.loop_exit_crit_edge, !prof [[PROF_FUNC1_1:![0-9]+]]
|
|
|
|
; A function with unknown loop-bounds so loop-rotation ends up with a
|
|
; condition jump in pre-header and loop body. branch_weight shows body is
|
|
; executed more often than header.
|
|
define void @func1(i32 %n) !prof !3 {
|
|
entry:
|
|
br label %loop_header
|
|
|
|
loop_header:
|
|
%i = phi i32 [0, %entry], [%i_inc, %loop_body]
|
|
%cmp = icmp slt i32 %i, %n
|
|
br i1 %cmp, label %loop_body, label %loop_exit, !prof !4
|
|
|
|
loop_body:
|
|
store volatile i32 %i, ptr @g, align 4
|
|
%i_inc = add i32 %i, 1
|
|
br label %loop_header
|
|
|
|
loop_exit:
|
|
ret void
|
|
}
|
|
|
|
; BFI_BEFORE-LABEL: block-frequency-info: func2
|
|
; BFI_BEFORE: - entry: {{.*}} count = 1024
|
|
; BFI_BEFORE: - loop_header: {{.*}} count = 1056
|
|
; BFI_BEFORE: - loop_body: {{.*}} count = 32
|
|
; BFI_BEFORE: - loop_exit: {{.*}} count = 1024
|
|
|
|
; BFI_AFTER-LABEL: block-frequency-info: func2
|
|
; - entry: {{.*}} count = 1024
|
|
; - loop_body.lr.ph: {{.*}} count = 32
|
|
; - loop_body: {{.*}} count = 32
|
|
; - loop_header.loop_exit_crit_edge: {{.*}} count = 32
|
|
; - loop_exit: {{.*}} count = 1024
|
|
|
|
; IR-LABEL: define void @func2
|
|
; IR: entry:
|
|
; IR: br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC2_0:![0-9]+]]
|
|
|
|
; IR: loop_body:
|
|
; IR: br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC2_1:![0-9]+]]
|
|
|
|
; A function with unknown loop-bounds so loop-rotation ends up with a
|
|
; condition jump in pre-header and loop body. Similar to `func1` but here
|
|
; loop-exit count is higher than backedge count.
|
|
define void @func2(i32 %n) !prof !3 {
|
|
entry:
|
|
br label %loop_header
|
|
|
|
loop_header:
|
|
%i = phi i32 [0, %entry], [%i_inc, %loop_body]
|
|
%cmp = icmp slt i32 %i, %n
|
|
br i1 %cmp, label %loop_exit, label %loop_body, !prof !5
|
|
|
|
loop_body:
|
|
store volatile i32 %i, ptr @g, align 4
|
|
%i_inc = add i32 %i, 1
|
|
br label %loop_header
|
|
|
|
loop_exit:
|
|
ret void
|
|
}
|
|
|
|
; BFI_BEFORE-LABEL: block-frequency-info: func3_zero_branch_weight
|
|
; BFI_BEFORE: - entry: {{.*}} count = 1024
|
|
; BFI_BEFORE: - loop_header: {{.*}} count = 2199023255552
|
|
; BFI_BEFORE: - loop_body: {{.*}} count = 2199023254528
|
|
; BFI_BEFORE: - loop_exit: {{.*}} count = 1024
|
|
|
|
; BFI_AFTER-LABEL: block-frequency-info: func3_zero_branch_weight
|
|
; BFI_AFTER: - entry: {{.*}} count = 1024
|
|
; BFI_AFTER: - loop_body.lr.ph: {{.*}} count = 1024
|
|
; BFI_AFTER: - loop_body: {{.*}} count = 2199023255552
|
|
; BFI_AFTER: - loop_header.loop_exit_crit_edge: {{.*}} count = 1024
|
|
; BFI_AFTER: - loop_exit: {{.*}} count = 1024
|
|
|
|
; IR-LABEL: define void @func3_zero_branch_weight
|
|
; IR: entry:
|
|
; IR: br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC3_0:![0-9]+]]
|
|
|
|
; IR: loop_body:
|
|
; IR: br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC3_0]]
|
|
|
|
define void @func3_zero_branch_weight(i32 %n) !prof !3 {
|
|
entry:
|
|
br label %loop_header
|
|
|
|
loop_header:
|
|
%i = phi i32 [0, %entry], [%i_inc, %loop_body]
|
|
%cmp = icmp slt i32 %i, %n
|
|
br i1 %cmp, label %loop_exit, label %loop_body, !prof !6
|
|
|
|
loop_body:
|
|
store volatile i32 %i, ptr @g, align 4
|
|
%i_inc = add i32 %i, 1
|
|
br label %loop_header
|
|
|
|
loop_exit:
|
|
ret void
|
|
}
|
|
|
|
; IR-LABEL: define void @func4_zero_branch_weight
|
|
; IR: entry:
|
|
; IR: br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC4_0:![0-9]+]]
|
|
|
|
; IR: loop_body:
|
|
; IR: br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC4_0]]
|
|
|
|
define void @func4_zero_branch_weight(i32 %n) !prof !3 {
|
|
entry:
|
|
br label %loop_header
|
|
|
|
loop_header:
|
|
%i = phi i32 [0, %entry], [%i_inc, %loop_body]
|
|
%cmp = icmp slt i32 %i, %n
|
|
br i1 %cmp, label %loop_exit, label %loop_body, !prof !7
|
|
|
|
loop_body:
|
|
store volatile i32 %i, ptr @g, align 4
|
|
%i_inc = add i32 %i, 1
|
|
br label %loop_header
|
|
|
|
loop_exit:
|
|
ret void
|
|
}
|
|
|
|
; IR-LABEL: define void @func5_zero_branch_weight
|
|
; IR: entry:
|
|
; IR: br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC5_0:![0-9]+]]
|
|
|
|
; IR: loop_body:
|
|
; IR: br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC5_0]]
|
|
|
|
define void @func5_zero_branch_weight(i32 %n) !prof !3 {
|
|
entry:
|
|
br label %loop_header
|
|
|
|
loop_header:
|
|
%i = phi i32 [0, %entry], [%i_inc, %loop_body]
|
|
%cmp = icmp slt i32 %i, %n
|
|
br i1 %cmp, label %loop_exit, label %loop_body, !prof !8
|
|
|
|
loop_body:
|
|
store volatile i32 %i, ptr @g, align 4
|
|
%i_inc = add i32 %i, 1
|
|
br label %loop_header
|
|
|
|
loop_exit:
|
|
ret void
|
|
}
|
|
|
|
; BFI_BEFORE-LABEL: block-frequency-info: func6_inaccurate_branch_weight
|
|
; BFI_BEFORE: - entry: {{.*}} count = 1024
|
|
; BFI_BEFORE: - loop_header: {{.*}} count = 2047
|
|
; BFI_BEFORE: - loop_body: {{.*}} count = 1023
|
|
; BFI_BEFORE: - loop_exit: {{.*}} count = 1024
|
|
|
|
; BFI_AFTER-LABEL: block-frequency-info: func6_inaccurate_branch_weight
|
|
; BFI_AFTER: - entry: {{.*}} count = 1024
|
|
; BFI_AFTER: - loop_body: {{.*}} count = 1024
|
|
; BFI_AFTER: - loop_exit: {{.*}} count = 1024
|
|
|
|
; IR-LABEL: define void @func6_inaccurate_branch_weight(
|
|
; IR: entry:
|
|
; IR: br label %loop_body
|
|
; IR: loop_body:
|
|
; IR: br i1 %cmp, label %loop_body, label %loop_exit, !prof [[PROF_FUNC6_0:![0-9]+]]
|
|
; IR: loop_exit:
|
|
; IR: ret void
|
|
|
|
; Branch weight from sample-based PGO may be inaccurate due to sampling.
|
|
; Count for loop_body in following case should be not less than loop_exit.
|
|
; However this may not hold for Sample-based PGO.
|
|
define void @func6_inaccurate_branch_weight() !prof !3 {
|
|
entry:
|
|
br label %loop_header
|
|
|
|
loop_header:
|
|
%i = phi i32 [0, %entry], [%i_inc, %loop_body]
|
|
%cmp = icmp slt i32 %i, 2
|
|
br i1 %cmp, label %loop_body, label %loop_exit, !prof !9
|
|
|
|
loop_body:
|
|
store volatile i32 %i, ptr @g, align 4
|
|
%i_inc = add i32 %i, 1
|
|
br label %loop_header
|
|
|
|
loop_exit:
|
|
ret void
|
|
}
|
|
|
|
!0 = !{!"function_entry_count", i64 1}
|
|
!1 = !{!"branch_weights", i32 1000, i32 1}
|
|
!2 = !{!"branch_weights", i32 3000, i32 1000}
|
|
!3 = !{!"function_entry_count", i64 1024}
|
|
!4 = !{!"branch_weights", i32 40, i32 2}
|
|
!5 = !{!"branch_weights", i32 10240, i32 320}
|
|
!6 = !{!"branch_weights", i32 0, i32 1}
|
|
!7 = !{!"branch_weights", i32 1, i32 0}
|
|
!8 = !{!"branch_weights", i32 0, i32 0}
|
|
!9 = !{!"branch_weights", i32 1023, i32 1024}
|
|
|
|
; IR: [[PROF_FUNC0_0]] = !{!"branch_weights", i32 2000, i32 1000}
|
|
; IR: [[PROF_FUNC0_1]] = !{!"branch_weights", i32 999, i32 1}
|
|
; IR: [[PROF_FUNC1_0]] = !{!"branch_weights", i32 127, i32 1}
|
|
; IR: [[PROF_FUNC1_1]] = !{!"branch_weights", i32 2433, i32 127}
|
|
; IR: [[PROF_FUNC2_0]] = !{!"branch_weights", i32 9920, i32 320}
|
|
; IR: [[PROF_FUNC2_1]] = !{!"branch_weights", i32 320, i32 0}
|
|
; IR: [[PROF_FUNC3_0]] = !{!"branch_weights", i32 0, i32 1}
|
|
; IR: [[PROF_FUNC4_0]] = !{!"branch_weights", i32 1, i32 0}
|
|
; IR: [[PROF_FUNC5_0]] = !{!"branch_weights", i32 0, i32 0}
|
|
; IR: [[PROF_FUNC6_0]] = !{!"branch_weights", i32 0, i32 1024}
|