[AArch64] Allow peephole to optimize AND + signed compare with 0 (#153608)

This should be the peephole's job. Because and sets V flag to 0, this is
why signed comparisons with 0 are okay to replace with tst. Note this is
only for AArch64, because ANDS on ARM leaves the V flag the same.

Fixes: https://github.com/llvm/llvm-project/issues/154387
This commit is contained in:
AZero13 2025-11-10 17:32:31 -05:00 committed by GitHub
parent d5125b3089
commit 7b12a08f5e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 434 additions and 1 deletions

View File

@ -1780,6 +1780,16 @@ static unsigned sForm(MachineInstr &Instr) {
case AArch64::SUBSWri:
case AArch64::SUBSXrr:
case AArch64::SUBSXri:
case AArch64::ANDSWri:
case AArch64::ANDSWrr:
case AArch64::ANDSWrs:
case AArch64::ANDSXri:
case AArch64::ANDSXrr:
case AArch64::ANDSXrs:
case AArch64::BICSWrr:
case AArch64::BICSXrr:
case AArch64::BICSWrs:
case AArch64::BICSXrs:
return Instr.getOpcode();
case AArch64::ADDWrr:
@ -1810,6 +1820,22 @@ static unsigned sForm(MachineInstr &Instr) {
return AArch64::ANDSWri;
case AArch64::ANDXri:
return AArch64::ANDSXri;
case AArch64::ANDWrr:
return AArch64::ANDSWrr;
case AArch64::ANDWrs:
return AArch64::ANDSWrs;
case AArch64::ANDXrr:
return AArch64::ANDSXrr;
case AArch64::ANDXrs:
return AArch64::ANDSXrs;
case AArch64::BICWrr:
return AArch64::BICSWrr;
case AArch64::BICXrr:
return AArch64::BICSXrr;
case AArch64::BICWrs:
return AArch64::BICSWrs;
case AArch64::BICXrs:
return AArch64::BICSXrs;
}
}
@ -1947,6 +1973,25 @@ static bool isSUBSRegImm(unsigned Opcode) {
return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
}
static bool isANDOpcode(MachineInstr &MI) {
unsigned Opc = sForm(MI);
switch (Opc) {
case AArch64::ANDSWri:
case AArch64::ANDSWrr:
case AArch64::ANDSWrs:
case AArch64::ANDSXri:
case AArch64::ANDSXrr:
case AArch64::ANDSXrs:
case AArch64::BICSWrr:
case AArch64::BICSXrr:
case AArch64::BICSWrs:
case AArch64::BICSXrs:
return true;
default:
return false;
}
}
/// Check if CmpInstr can be substituted by MI.
///
/// CmpInstr can be substituted:
@ -1984,7 +2029,8 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
// 1) MI and CmpInstr set N and V to the same value.
// 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
// signed overflow occurs, so CmpInstr could still be simplified away.
if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
// Note that Ands and Bics instructions always clear the V flag.
if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDOpcode(MI))
return false;
AccessKind AccessToCheck = AK_Write;

View File

@ -0,0 +1,55 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=aarch64-linux-gnu -run-pass peephole-opt -o - %s | FileCheck %s
--- |
define i32 @test01() nounwind {
entry:
%0 = select i1 true, i32 1, i32 0
%1 = and i32 %0, 65535
%2 = icmp sgt i32 %1, 0
br i1 %2, label %if.then, label %if.end
if.then: ; preds = %entry
ret i32 1
if.end: ; preds = %entry
ret i32 0
}
...
---
name: test01
registers:
- { id: 0, class: gpr32 }
- { id: 1, class: gpr32common }
body: |
; CHECK-LABEL: name: test01
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
; CHECK-NEXT: [[ANDSWri:%[0-9]+]]:gpr32common = ANDSWri killed [[ANDSWri]], 15, implicit-def $nzcv
; CHECK-NEXT: Bcc 12, %bb.2, implicit $nzcv
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.if.then:
; CHECK-NEXT: $w0 = MOVi32imm 1
; CHECK-NEXT: RET_ReallyLR implicit $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.if.end:
; CHECK-NEXT: $w0 = MOVi32imm 0
; CHECK-NEXT: RET_ReallyLR implicit $w0
bb.0.entry:
successors: %bb.2.if.end, %bb.1.if.then
%0 = MOVi32imm 1
%1 = ANDWri killed %1, 15
$wzr = SUBSWri killed %1, 0, 0, implicit-def $nzcv
Bcc 12, %bb.2.if.end, implicit $nzcv
bb.1.if.then:
$w0 = MOVi32imm 1
RET_ReallyLR implicit $w0
bb.2.if.end:
$w0 = MOVi32imm 0
RET_ReallyLR implicit $w0
...

View File

@ -161,6 +161,338 @@ define i1 @lt64_u16_and_23(i64 %0) {
ret i1 %3
}
define i1 @test_disjoint(i1 %0, i32 %1, i32 %2) {
; CHECK-LABEL: test_disjoint:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: orr w9, w2, #0x800000
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: tst w9, w8
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: orr w8, w0, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
entry:
%3 = or disjoint i32 %2, 8388608
%4 = shl nuw i32 1, %1
%5 = and i32 %3, %4
%6 = icmp eq i32 %5, 0
%7 = select i1 %0, i1 true, i1 %6
ret i1 %7
}
define i1 @test_disjoint2(i1 %0, i32 %1, i32 %2) {
; CHECK-LABEL: test_disjoint2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: orr w9, w2, #0x800000
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: tst w9, w8
; CHECK-NEXT: cset w8, gt
; CHECK-NEXT: orr w8, w0, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
entry:
%3 = or disjoint i32 %2, 8388608
%4 = shl nuw i32 1, %1
%5 = and i32 %3, %4
%6 = icmp sgt i32 %5, 0
%7 = select i1 %0, i1 true, i1 %6
ret i1 %7
}
define i1 @test_disjoint3(i1 %0, i32 %1, i32 %2) {
; CHECK-LABEL: test_disjoint3:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: orr w9, w2, #0x800000
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: tst w9, w8
; CHECK-NEXT: cset w8, mi
; CHECK-NEXT: orr w8, w0, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
entry:
%3 = or disjoint i32 %2, 8388608
%4 = shl nuw i32 1, %1
%5 = and i32 %3, %4
%6 = icmp slt i32 %5, 0
%7 = select i1 %0, i1 true, i1 %6
ret i1 %7
}
define i1 @test_disjoint4(i1 %0, i32 %1, i32 %2) {
; CHECK-LABEL: test_disjoint4:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: orr w9, w2, #0x800000
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: and w8, w9, w8
; CHECK-NEXT: cmp w8, #1
; CHECK-NEXT: cset w8, lt
; CHECK-NEXT: orr w8, w0, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
entry:
%3 = or disjoint i32 %2, 8388608
%4 = shl nuw i32 1, %1
%5 = and i32 %3, %4
%6 = icmp sle i32 %5, 0
%7 = select i1 %0, i1 true, i1 %6
ret i1 %7
}
define i1 @test_disjoint_inverse_4(i1 %0, i32 %1, i32 %2) {
; CHECK-LABEL: test_disjoint_inverse_4:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: orr w9, w2, #0x800000
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: bic w8, w9, w8
; CHECK-NEXT: cmp w8, #1
; CHECK-NEXT: cset w8, lt
; CHECK-NEXT: orr w8, w0, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
entry:
%3 = or disjoint i32 %2, 8388608
%4 = shl nuw i32 1, %1
%not = xor i32 %4, -1
%5 = and i32 %3, %not
%6 = icmp sle i32 %5, 0
%7 = select i1 %0, i1 true, i1 %6
ret i1 %7
}
define i1 @test_disjoint_inverse(i1 %0, i32 %1, i32 %2) {
; CHECK-LABEL: test_disjoint_inverse:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: orr w9, w2, #0x800000
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: bics wzr, w9, w8
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: orr w8, w0, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
entry:
%3 = or disjoint i32 %2, 8388608
%4 = shl nuw i32 1, %1
%not = xor i32 %4, -1
%5 = and i32 %3, %not
%6 = icmp eq i32 %5, 0
%7 = select i1 %0, i1 true, i1 %6
ret i1 %7
}
define i1 @test_disjoint2_inverse(i1 %0, i32 %1, i32 %2) {
; CHECK-LABEL: test_disjoint2_inverse:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: orr w9, w2, #0x800000
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: bics wzr, w9, w8
; CHECK-NEXT: cset w8, gt
; CHECK-NEXT: orr w8, w0, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
entry:
%3 = or disjoint i32 %2, 8388608
%4 = shl nuw i32 1, %1
%not = xor i32 %4, -1
%5 = and i32 %3, %not
%6 = icmp sgt i32 %5, 0
%7 = select i1 %0, i1 true, i1 %6
ret i1 %7
}
define i1 @test_disjoint3_inverse(i1 %0, i32 %1, i32 %2) {
; CHECK-LABEL: test_disjoint3_inverse:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: orr w9, w2, #0x800000
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: bics wzr, w9, w8
; CHECK-NEXT: cset w8, mi
; CHECK-NEXT: orr w8, w0, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
entry:
%3 = or disjoint i32 %2, 8388608
%4 = shl nuw i32 1, %1
%not = xor i32 %4, -1
%5 = and i32 %3, %not
%6 = icmp slt i32 %5, 0
%7 = select i1 %0, i1 true, i1 %6
ret i1 %7
}
define i1 @test_disjoint_64(i1 %0, i64 %1, i64 %2) {
; CHECK-LABEL: test_disjoint_64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: orr x9, x2, #0x80000000000000
; CHECK-NEXT: lsl x8, x8, x1
; CHECK-NEXT: tst x9, x8
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: orr w8, w0, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
entry:
%3 = or disjoint i64 %2, 36028797018963968
%4 = shl nuw i64 1, %1
%5 = and i64 %3, %4
%6 = icmp eq i64 %5, 0
%7 = select i1 %0, i1 true, i1 %6
ret i1 %7
}
define i1 @test_disjoint2_64(i1 %0, i64 %1, i64 %2) {
; CHECK-LABEL: test_disjoint2_64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: orr x9, x2, #0x80000000000000
; CHECK-NEXT: lsl x8, x8, x1
; CHECK-NEXT: tst x9, x8
; CHECK-NEXT: cset w8, gt
; CHECK-NEXT: orr w8, w0, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
entry:
%3 = or disjoint i64 %2, 36028797018963968
%4 = shl nuw i64 1, %1
%5 = and i64 %3, %4
%6 = icmp sgt i64 %5, 0
%7 = select i1 %0, i1 true, i1 %6
ret i1 %7
}
define i1 @test_disjoint3_64(i1 %0, i64 %1, i64 %2) {
; CHECK-LABEL: test_disjoint3_64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: orr x9, x2, #0x80000000000000
; CHECK-NEXT: lsl x8, x8, x1
; CHECK-NEXT: tst x9, x8
; CHECK-NEXT: cset w8, mi
; CHECK-NEXT: orr w8, w0, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
entry:
%3 = or disjoint i64 %2, 36028797018963968
%4 = shl nuw i64 1, %1
%5 = and i64 %3, %4
%6 = icmp slt i64 %5, 0
%7 = select i1 %0, i1 true, i1 %6
ret i1 %7
}
define i1 @test_disjoint4_64(i1 %0, i64 %1, i64 %2) {
; CHECK-LABEL: test_disjoint4_64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: orr x9, x2, #0x80000000000000
; CHECK-NEXT: lsl x8, x8, x1
; CHECK-NEXT: and x8, x9, x8
; CHECK-NEXT: cmp x8, #1
; CHECK-NEXT: cset w8, lt
; CHECK-NEXT: orr w8, w0, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
entry:
%3 = or disjoint i64 %2, 36028797018963968
%4 = shl nuw i64 1, %1
%5 = and i64 %3, %4
%6 = icmp sle i64 %5, 0
%7 = select i1 %0, i1 true, i1 %6
ret i1 %7
}
define i1 @test_disjoint_inverse_4_64(i1 %0, i64 %1, i64 %2) {
; CHECK-LABEL: test_disjoint_inverse_4_64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: orr x9, x2, #0x80000000000000
; CHECK-NEXT: lsl x8, x8, x1
; CHECK-NEXT: bic x8, x9, x8
; CHECK-NEXT: cmp x8, #1
; CHECK-NEXT: cset w8, lt
; CHECK-NEXT: orr w8, w0, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
entry:
%3 = or disjoint i64 %2, 36028797018963968
%4 = shl nuw i64 1, %1
%not = xor i64 %4, -1
%5 = and i64 %3, %not
%6 = icmp sle i64 %5, 0
%7 = select i1 %0, i1 true, i1 %6
ret i1 %7
}
define i1 @test_disjoint_inverse_64(i1 %0, i64 %1, i64 %2) {
; CHECK-LABEL: test_disjoint_inverse_64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: orr x9, x2, #0x80000000000000
; CHECK-NEXT: lsl x8, x8, x1
; CHECK-NEXT: bics xzr, x9, x8
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: orr w8, w0, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
entry:
%3 = or disjoint i64 %2, 36028797018963968
%4 = shl nuw i64 1, %1
%not = xor i64 %4, -1
%5 = and i64 %3, %not
%6 = icmp eq i64 %5, 0
%7 = select i1 %0, i1 true, i1 %6
ret i1 %7
}
define i1 @test_disjoint2_inverse_64(i1 %0, i64 %1, i64 %2) {
; CHECK-LABEL: test_disjoint2_inverse_64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: orr x9, x2, #0x80000000000000
; CHECK-NEXT: lsl x8, x8, x1
; CHECK-NEXT: bics xzr, x9, x8
; CHECK-NEXT: cset w8, gt
; CHECK-NEXT: orr w8, w0, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
entry:
%3 = or disjoint i64 %2, 36028797018963968
%4 = shl nuw i64 1, %1
%not = xor i64 %4, -1
%5 = and i64 %3, %not
%6 = icmp sgt i64 %5, 0
%7 = select i1 %0, i1 true, i1 %6
ret i1 %7
}
define i1 @test_disjoint3_inverse_64(i1 %0, i64 %1, i64 %2) {
; CHECK-LABEL: test_disjoint3_inverse_64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: orr x9, x2, #0x80000000000000
; CHECK-NEXT: lsl x8, x8, x1
; CHECK-NEXT: bics xzr, x9, x8
; CHECK-NEXT: cset w8, mi
; CHECK-NEXT: orr w8, w0, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
entry:
%3 = or disjoint i64 %2, 36028797018963968
%4 = shl nuw i64 1, %1
%not = xor i64 %4, -1
%5 = and i64 %3, %not
%6 = icmp slt i64 %5, 0
%7 = select i1 %0, i1 true, i1 %6
ret i1 %7
}
; negative test
define i1 @lt3_u8(i8 %0) {
; CHECK-LABEL: lt3_u8: