Min-Yih Hsu 7e2f96194f
[MachineSink] Fix missing sinks along critical edges (#97618)
4e0bd3f improved early MachineLICM's capabilities to hoist COPY from
physical registers out of a loop. However, it accidentally broke one of
MachineSink's preconditions on sinking cheap instructions (in this case,
COPY) which considered those instructions being profitable to sink only
when there are at least two of them in the same def-use chain in the
same basic block. So if early MachineLICM hoisted one of them out,
MachineSink no longer sink rest of the cheap instructions. This results
in redundant load immediate instructions from the motivating example
we've seen on RISC-V.

This patch fixes this by teaching MachineSink that if there is more than
one demand to sink a register into the same block from different
critical edges, it should be considered profitable as it increases the
CSE opportunities.
This change also improves two of the AArch64's cases.
2024-07-09 10:48:22 -07:00

123 lines
3.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs < %s | FileCheck %s
; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -mtriple=aarch64-linux %s | FileCheck --check-prefix=CHECK-CGP %s
; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -cgpp-huge-func=0 -mtriple=aarch64-linux %s | FileCheck --check-prefix=CHECK-CGP %s
@A = dso_local global i32 zeroinitializer
@B = dso_local global i32 zeroinitializer
@C = dso_local global i32 zeroinitializer
; Test that and is sunk into cmp block to form tbz.
define dso_local i32 @and_sink1(i32 %a, i1 %c) {
; CHECK-LABEL: and_sink1:
; CHECK: // %bb.0:
; CHECK-NEXT: tbz w1, #0, .LBB0_3
; CHECK-NEXT: // %bb.1: // %bb0
; CHECK-NEXT: adrp x8, A
; CHECK-NEXT: str wzr, [x8, :lo12:A]
; CHECK-NEXT: tbnz w0, #2, .LBB0_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_3: // %bb2
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
; CHECK-CGP-LABEL: @and_sink1(
; CHECK-CGP-NOT: and i32
%and = and i32 %a, 4
br i1 %c, label %bb0, label %bb2
bb0:
; CHECK-CGP-LABEL: bb0:
; CHECK-CGP: and i32
; CHECK-CGP-NEXT: icmp eq i32
; CHECK-CGP-NEXT: store
; CHECK-CGP-NEXT: br
%cmp = icmp eq i32 %and, 0
store i32 0, ptr @A
br i1 %cmp, label %bb1, label %bb2
bb1:
ret i32 1
bb2:
ret i32 0
}
; Test that both 'and' and cmp get sunk to form tbz.
define dso_local i32 @and_sink2(i32 %a, i1 %c, i1 %c2) {
; CHECK-LABEL: and_sink2:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, A
; CHECK-NEXT: str wzr, [x8, :lo12:A]
; CHECK-NEXT: tbz w1, #0, .LBB1_5
; CHECK-NEXT: // %bb.1: // %bb0.preheader
; CHECK-NEXT: adrp x8, B
; CHECK-NEXT: adrp x9, C
; CHECK-NEXT: .LBB1_2: // %bb0
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: str wzr, [x8, :lo12:B]
; CHECK-NEXT: tbz w2, #0, .LBB1_5
; CHECK-NEXT: // %bb.3: // %bb1
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: str wzr, [x9, :lo12:C]
; CHECK-NEXT: tbnz w0, #2, .LBB1_2
; CHECK-NEXT: // %bb.4:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB1_5:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
; CHECK-CGP-LABEL: @and_sink2(
; CHECK-CGP-NOT: and i32
%and = and i32 %a, 4
store i32 0, ptr @A
br i1 %c, label %bb0, label %bb3
bb0:
; CHECK-CGP-LABEL: bb0:
; CHECK-CGP-NOT: and i32
; CHECK-CGP-NOT: icmp
%cmp = icmp eq i32 %and, 0
store i32 0, ptr @B
br i1 %c2, label %bb1, label %bb3
bb1:
; CHECK-CGP-LABEL: bb1:
; CHECK-CGP: and i32
; CHECK-CGP-NEXT: icmp eq i32
; CHECK-CGP-NEXT: store
; CHECK-CGP-NEXT: br
store i32 0, ptr @C
br i1 %cmp, label %bb2, label %bb0
bb2:
ret i32 1
bb3:
ret i32 0
}
; Test that 'and' is not sunk since cbz is a better alternative.
define dso_local i32 @and_sink3(i32 %a) {
; CHECK-LABEL: and_sink3:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, A
; CHECK-NEXT: and w9, w0, #0x3
; CHECK-NEXT: .LBB2_1: // %bb0
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: str wzr, [x8, :lo12:A]
; CHECK-NEXT: cbz w9, .LBB2_1
; CHECK-NEXT: // %bb.2: // %bb2
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
; CHECK-CGP-LABEL: @and_sink3(
; CHECK-CGP-NEXT: and i32
%and = and i32 %a, 3
br label %bb0
bb0:
; CHECK-CGP-LABEL: bb0:
; CHECK-CGP-NOT: and i32
%cmp = icmp eq i32 %and, 0
store i32 0, ptr @A
br i1 %cmp, label %bb0, label %bb2
bb2:
ret i32 0
}