
4e0bd3f improved early MachineLICM's capabilities to hoist COPY from physical registers out of a loop. However, it accidentally broke one of MachineSink's preconditions on sinking cheap instructions (in this case, COPY) which considered those instructions being profitable to sink only when there are at least two of them in the same def-use chain in the same basic block. So if early MachineLICM hoisted one of them out, MachineSink no longer sink rest of the cheap instructions. This results in redundant load immediate instructions from the motivating example we've seen on RISC-V. This patch fixes this by teaching MachineSink that if there is more than one demand to sink a register into the same block from different critical edges, it should be considered profitable as it increases the CSE opportunities. This change also improves two of the AArch64's cases.
123 lines
3.5 KiB
LLVM
123 lines
3.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs < %s | FileCheck %s
|
|
; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -mtriple=aarch64-linux %s | FileCheck --check-prefix=CHECK-CGP %s
|
|
; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -cgpp-huge-func=0 -mtriple=aarch64-linux %s | FileCheck --check-prefix=CHECK-CGP %s
|
|
|
|
@A = dso_local global i32 zeroinitializer
|
|
@B = dso_local global i32 zeroinitializer
|
|
@C = dso_local global i32 zeroinitializer
|
|
|
|
; Test that and is sunk into cmp block to form tbz.
|
|
define dso_local i32 @and_sink1(i32 %a, i1 %c) {
|
|
; CHECK-LABEL: and_sink1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: tbz w1, #0, .LBB0_3
|
|
; CHECK-NEXT: // %bb.1: // %bb0
|
|
; CHECK-NEXT: adrp x8, A
|
|
; CHECK-NEXT: str wzr, [x8, :lo12:A]
|
|
; CHECK-NEXT: tbnz w0, #2, .LBB0_3
|
|
; CHECK-NEXT: // %bb.2:
|
|
; CHECK-NEXT: mov w0, #1 // =0x1
|
|
; CHECK-NEXT: ret
|
|
; CHECK-NEXT: .LBB0_3: // %bb2
|
|
; CHECK-NEXT: mov w0, wzr
|
|
; CHECK-NEXT: ret
|
|
|
|
; CHECK-CGP-LABEL: @and_sink1(
|
|
; CHECK-CGP-NOT: and i32
|
|
%and = and i32 %a, 4
|
|
br i1 %c, label %bb0, label %bb2
|
|
bb0:
|
|
; CHECK-CGP-LABEL: bb0:
|
|
; CHECK-CGP: and i32
|
|
; CHECK-CGP-NEXT: icmp eq i32
|
|
; CHECK-CGP-NEXT: store
|
|
; CHECK-CGP-NEXT: br
|
|
%cmp = icmp eq i32 %and, 0
|
|
store i32 0, ptr @A
|
|
br i1 %cmp, label %bb1, label %bb2
|
|
bb1:
|
|
ret i32 1
|
|
bb2:
|
|
ret i32 0
|
|
}
|
|
|
|
; Test that both 'and' and cmp get sunk to form tbz.
|
|
define dso_local i32 @and_sink2(i32 %a, i1 %c, i1 %c2) {
|
|
; CHECK-LABEL: and_sink2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, A
|
|
; CHECK-NEXT: str wzr, [x8, :lo12:A]
|
|
; CHECK-NEXT: tbz w1, #0, .LBB1_5
|
|
; CHECK-NEXT: // %bb.1: // %bb0.preheader
|
|
; CHECK-NEXT: adrp x8, B
|
|
; CHECK-NEXT: adrp x9, C
|
|
; CHECK-NEXT: .LBB1_2: // %bb0
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: str wzr, [x8, :lo12:B]
|
|
; CHECK-NEXT: tbz w2, #0, .LBB1_5
|
|
; CHECK-NEXT: // %bb.3: // %bb1
|
|
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
|
|
; CHECK-NEXT: str wzr, [x9, :lo12:C]
|
|
; CHECK-NEXT: tbnz w0, #2, .LBB1_2
|
|
; CHECK-NEXT: // %bb.4:
|
|
; CHECK-NEXT: mov w0, #1 // =0x1
|
|
; CHECK-NEXT: ret
|
|
; CHECK-NEXT: .LBB1_5:
|
|
; CHECK-NEXT: mov w0, wzr
|
|
; CHECK-NEXT: ret
|
|
|
|
; CHECK-CGP-LABEL: @and_sink2(
|
|
; CHECK-CGP-NOT: and i32
|
|
%and = and i32 %a, 4
|
|
store i32 0, ptr @A
|
|
br i1 %c, label %bb0, label %bb3
|
|
bb0:
|
|
; CHECK-CGP-LABEL: bb0:
|
|
; CHECK-CGP-NOT: and i32
|
|
; CHECK-CGP-NOT: icmp
|
|
%cmp = icmp eq i32 %and, 0
|
|
store i32 0, ptr @B
|
|
br i1 %c2, label %bb1, label %bb3
|
|
bb1:
|
|
; CHECK-CGP-LABEL: bb1:
|
|
; CHECK-CGP: and i32
|
|
; CHECK-CGP-NEXT: icmp eq i32
|
|
; CHECK-CGP-NEXT: store
|
|
; CHECK-CGP-NEXT: br
|
|
store i32 0, ptr @C
|
|
br i1 %cmp, label %bb2, label %bb0
|
|
bb2:
|
|
ret i32 1
|
|
bb3:
|
|
ret i32 0
|
|
}
|
|
|
|
; Test that 'and' is not sunk since cbz is a better alternative.
|
|
define dso_local i32 @and_sink3(i32 %a) {
|
|
; CHECK-LABEL: and_sink3:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, A
|
|
; CHECK-NEXT: and w9, w0, #0x3
|
|
; CHECK-NEXT: .LBB2_1: // %bb0
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: str wzr, [x8, :lo12:A]
|
|
; CHECK-NEXT: cbz w9, .LBB2_1
|
|
; CHECK-NEXT: // %bb.2: // %bb2
|
|
; CHECK-NEXT: mov w0, wzr
|
|
; CHECK-NEXT: ret
|
|
|
|
; CHECK-CGP-LABEL: @and_sink3(
|
|
; CHECK-CGP-NEXT: and i32
|
|
%and = and i32 %a, 3
|
|
br label %bb0
|
|
bb0:
|
|
; CHECK-CGP-LABEL: bb0:
|
|
; CHECK-CGP-NOT: and i32
|
|
%cmp = icmp eq i32 %and, 0
|
|
store i32 0, ptr @A
|
|
br i1 %cmp, label %bb0, label %bb2
|
|
bb2:
|
|
ret i32 0
|
|
}
|