llvm-project/llvm/test/CodeGen/AMDGPU/merged-bfx-opt.ll
Pierre van Houtryve c4b1557097
[DAG] Fold (setcc ((x | x >> c0 | ...) & mask)) sequences (#146054)
Fold sequences where we extract a bunch of contiguous bits from a value,
merge them into the low bit and then check if the low bits are zero or
not.

Usually the and would be on the outside (the leaves) of the expression,
but the DAG canonicalizes it to a single `and` at the root of the
expression.

The reason I put this in DAGCombiner instead of the target combiner is
because this is a generic, valid transform that's also fairly niche, so
there isn't much risk of a combine loop I think.

See #136727
2025-07-30 10:27:19 +02:00

124 lines
3.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -O3 -mtriple=amdgcn -mcpu=fiji %s -o - | FileCheck %s
define i1 @basic_eq_i16_3x5(i16 %arg) {
; CHECK-LABEL: basic_eq_i16_3x5:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; CHECK-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
%a = and i16 %arg, 31
%sh5 = lshr i16 %arg, 5
%b = and i16 %sh5, 31
%or = or i16 %a, %b
%sh10 = lshr i16 %arg, 10
%c = and i16 %sh10, 31
%or1 = or i16 %or, %c
%cmp = icmp eq i16 %or1, 0
ret i1 %cmp
}
define i1 @basic_eq_i32_3x5(i32 %arg) {
; CHECK-LABEL: basic_eq_i32_3x5:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
%a = and i32 %arg, 31
%sh5 = lshr i32 %arg, 5
%b = and i32 %sh5, 31
%or = or i32 %a, %b
%sh10 = lshr i32 %arg, 10
%c = and i32 %sh10, 31
%or1 = or i32 %or, %c
%cmp = icmp eq i32 %or1, 0
ret i1 %cmp
}
define i1 @basic_eq_i64_3x5(i64 %arg) {
; CHECK-LABEL: basic_eq_i64_3x5:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
%a = and i64 %arg, 31
%sh5 = lshr i64 %arg, 5
%b = and i64 %sh5, 31
%or = or i64 %a, %b
%sh10 = lshr i64 %arg, 10
%c = and i64 %sh10, 31
%or1 = or i64 %or, %c
%cmp = icmp eq i64 %or1, 0
ret i1 %cmp
}
define i1 @basic_ne_i32_3x5(i32 %arg) {
; CHECK-LABEL: basic_ne_i32_3x5:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
%a = and i32 %arg, 31
%sh5 = lshr i32 %arg, 5
%b = and i32 %sh5, 31
%or = or i32 %a, %b
%sh10 = lshr i32 %arg, 10
%c = and i32 %sh10, 31
%or1 = or i32 %or, %c
%cmp = icmp ne i32 %or1, 0
ret i1 %cmp
}
define i1 @eq_i32_3x5_holes_in_mask(i32 %arg) {
; CHECK-LABEL: eq_i32_3x5_holes_in_mask:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_and_b32_e32 v0, 0x7f9f, v0
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
%a = and i32 %arg, 31
%sh5 = lshr i32 %arg, 7
%b = and i32 %sh5, 31
%or = or i32 %a, %b
%sh10 = lshr i32 %arg, 10
%c = and i32 %sh10, 31
%or1 = or i32 %or, %c
%cmp = icmp ne i32 %or1, 0
ret i1 %cmp
}
define i1 @eq_i32_3x5_all_shifted(i32 %arg) {
; CHECK-LABEL: eq_i32_3x5_all_shifted:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_and_b32_e32 v0, 0x7ffc, v0
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
%sh2 = lshr i32 %arg, 2
%a = and i32 %sh2, 31
%sh5 = lshr i32 %arg, 7
%b = and i32 %sh5, 31
%or = or i32 %a, %b
%sh10 = lshr i32 %arg, 10
%c = and i32 %sh10, 31
%or1 = or i32 %or, %c
%cmp = icmp ne i32 %or1, 0
ret i1 %cmp
}