
4e0bd3f improved early MachineLICM's capabilities to hoist COPY from physical registers out of a loop. However, it accidentally broke one of MachineSink's preconditions on sinking cheap instructions (in this case, COPY) which considered those instructions being profitable to sink only when there are at least two of them in the same def-use chain in the same basic block. So if early MachineLICM hoisted one of them out, MachineSink no longer sink rest of the cheap instructions. This results in redundant load immediate instructions from the motivating example we've seen on RISC-V. This patch fixes this by teaching MachineSink that if there is more than one demand to sink a register into the same block from different critical edges, it should be considered profitable as it increases the CSE opportunities. This change also improves two of the AArch64's cases.
201 lines
5.3 KiB
LLVM
201 lines
5.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
|
|
|
|
define i64 @test_or(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: test_or:
|
|
; CHECK: ; %bb.0: ; %bb1
|
|
; CHECK-NEXT: cbnz w0, LBB0_2
|
|
; CHECK-NEXT: LBB0_1:
|
|
; CHECK-NEXT: mov x0, xzr
|
|
; CHECK-NEXT: ret
|
|
; CHECK-NEXT: LBB0_2: ; %bb1.cond.split
|
|
; CHECK-NEXT: cbz w1, LBB0_1
|
|
; CHECK-NEXT: ; %bb.3: ; %bb4
|
|
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset w30, -8
|
|
; CHECK-NEXT: .cfi_offset w29, -16
|
|
; CHECK-NEXT: bl _bar
|
|
; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
bb1:
|
|
%0 = icmp eq i32 %a, 0
|
|
%1 = icmp eq i32 %b, 0
|
|
%or.cond = or i1 %0, %1
|
|
br i1 %or.cond, label %bb3, label %bb4, !prof !0
|
|
|
|
bb3:
|
|
ret i64 0
|
|
|
|
bb4:
|
|
%2 = call i64 @bar()
|
|
ret i64 %2
|
|
}
|
|
|
|
define i64 @test_or_select(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: test_or_select:
|
|
; CHECK: ; %bb.0: ; %bb1
|
|
; CHECK-NEXT: cbnz w0, LBB1_2
|
|
; CHECK-NEXT: LBB1_1:
|
|
; CHECK-NEXT: mov x0, xzr
|
|
; CHECK-NEXT: ret
|
|
; CHECK-NEXT: LBB1_2: ; %bb1.cond.split
|
|
; CHECK-NEXT: cbz w1, LBB1_1
|
|
; CHECK-NEXT: ; %bb.3: ; %bb4
|
|
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset w30, -8
|
|
; CHECK-NEXT: .cfi_offset w29, -16
|
|
; CHECK-NEXT: bl _bar
|
|
; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
bb1:
|
|
%0 = icmp eq i32 %a, 0
|
|
%1 = icmp eq i32 %b, 0
|
|
%or.cond = select i1 %0, i1 true, i1 %1
|
|
br i1 %or.cond, label %bb3, label %bb4, !prof !0
|
|
|
|
bb3:
|
|
ret i64 0
|
|
|
|
bb4:
|
|
%2 = call i64 @bar()
|
|
ret i64 %2
|
|
}
|
|
|
|
define i64 @test_and(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: test_and:
|
|
; CHECK: ; %bb.0: ; %bb1
|
|
; CHECK-NEXT: cbnz w0, LBB2_2
|
|
; CHECK-NEXT: LBB2_1:
|
|
; CHECK-NEXT: mov x0, xzr
|
|
; CHECK-NEXT: ret
|
|
; CHECK-NEXT: LBB2_2: ; %bb1.cond.split
|
|
; CHECK-NEXT: cbz w1, LBB2_1
|
|
; CHECK-NEXT: ; %bb.3: ; %bb4
|
|
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset w30, -8
|
|
; CHECK-NEXT: .cfi_offset w29, -16
|
|
; CHECK-NEXT: bl _bar
|
|
; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
bb1:
|
|
%0 = icmp ne i32 %a, 0
|
|
%1 = icmp ne i32 %b, 0
|
|
%or.cond = and i1 %0, %1
|
|
br i1 %or.cond, label %bb4, label %bb3, !prof !1
|
|
|
|
bb3:
|
|
ret i64 0
|
|
|
|
bb4:
|
|
%2 = call i64 @bar()
|
|
ret i64 %2
|
|
}
|
|
|
|
define i64 @test_and_select(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: test_and_select:
|
|
; CHECK: ; %bb.0: ; %bb1
|
|
; CHECK-NEXT: cbnz w0, LBB3_2
|
|
; CHECK-NEXT: LBB3_1:
|
|
; CHECK-NEXT: mov x0, xzr
|
|
; CHECK-NEXT: ret
|
|
; CHECK-NEXT: LBB3_2: ; %bb1.cond.split
|
|
; CHECK-NEXT: cbz w1, LBB3_1
|
|
; CHECK-NEXT: ; %bb.3: ; %bb4
|
|
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset w30, -8
|
|
; CHECK-NEXT: .cfi_offset w29, -16
|
|
; CHECK-NEXT: bl _bar
|
|
; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
bb1:
|
|
%0 = icmp ne i32 %a, 0
|
|
%1 = icmp ne i32 %b, 0
|
|
%or.cond = select i1 %0, i1 %1, i1 false
|
|
br i1 %or.cond, label %bb4, label %bb3, !prof !1
|
|
|
|
bb3:
|
|
ret i64 0
|
|
|
|
bb4:
|
|
%2 = call i64 @bar()
|
|
ret i64 %2
|
|
}
|
|
|
|
; If the branch is unpredictable, don't add another branch.
|
|
|
|
define i64 @test_or_unpredictable(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: test_or_unpredictable:
|
|
; CHECK: ; %bb.0: ; %bb1
|
|
; CHECK-NEXT: cmp w0, #0
|
|
; CHECK-NEXT: mov x0, xzr
|
|
; CHECK-NEXT: cset w8, eq
|
|
; CHECK-NEXT: cmp w1, #0
|
|
; CHECK-NEXT: cset w9, eq
|
|
; CHECK-NEXT: orr w8, w8, w9
|
|
; CHECK-NEXT: tbnz w8, #0, LBB4_2
|
|
; CHECK-NEXT: ; %bb.1: ; %bb4
|
|
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset w30, -8
|
|
; CHECK-NEXT: .cfi_offset w29, -16
|
|
; CHECK-NEXT: bl _bar
|
|
; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
|
|
; CHECK-NEXT: LBB4_2: ; %common.ret
|
|
; CHECK-NEXT: ret
|
|
bb1:
|
|
%0 = icmp eq i32 %a, 0
|
|
%1 = icmp eq i32 %b, 0
|
|
%or.cond = or i1 %0, %1
|
|
br i1 %or.cond, label %bb3, label %bb4, !unpredictable !2
|
|
|
|
bb3:
|
|
ret i64 0
|
|
|
|
bb4:
|
|
%2 = call i64 @bar()
|
|
ret i64 %2
|
|
}
|
|
|
|
define i64 @test_and_unpredictable(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: test_and_unpredictable:
|
|
; CHECK: ; %bb.0: ; %bb1
|
|
; CHECK-NEXT: cmp w0, #0
|
|
; CHECK-NEXT: mov x0, xzr
|
|
; CHECK-NEXT: cset w8, ne
|
|
; CHECK-NEXT: cmp w1, #0
|
|
; CHECK-NEXT: cset w9, ne
|
|
; CHECK-NEXT: and w8, w8, w9
|
|
; CHECK-NEXT: tbz w8, #0, LBB5_2
|
|
; CHECK-NEXT: ; %bb.1: ; %bb4
|
|
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset w30, -8
|
|
; CHECK-NEXT: .cfi_offset w29, -16
|
|
; CHECK-NEXT: bl _bar
|
|
; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
|
|
; CHECK-NEXT: LBB5_2: ; %common.ret
|
|
; CHECK-NEXT: ret
|
|
bb1:
|
|
%0 = icmp ne i32 %a, 0
|
|
%1 = icmp ne i32 %b, 0
|
|
%or.cond = and i1 %0, %1
|
|
br i1 %or.cond, label %bb4, label %bb3, !unpredictable !2
|
|
|
|
bb3:
|
|
ret i64 0
|
|
|
|
bb4:
|
|
%2 = call i64 @bar()
|
|
ret i64 %2
|
|
}
|
|
|
|
declare i64 @bar()
|
|
|
|
!0 = !{!"branch_weights", i32 5128, i32 32}
|
|
!1 = !{!"branch_weights", i32 1024, i32 4136}
|
|
!2 = !{}
|