woruyu bbcebec3af
[DAG] Refactor X86 combineVSelectWithAllOnesOrZeros fold into a generic DAG Combine (#145298)
This PR resolves https://github.com/llvm/llvm-project/issues/144513

The modification include five pattern :
1.vselect Cond, 0, 0 → 0
2.vselect Cond, -1, 0 → bitcast Cond
3.vselect Cond, -1, x → or Cond, x
4.vselect Cond, x, 0 → and Cond, x
5.vselect Cond, 000..., X -> andn Cond, X

1-4 have been migrated to DAGCombine. 5 still in x86 code.

The reason is that you cannot use the andn instruction directly in
DAGCombine, you can only use and+xor, which will introduce optimization
order issues. For example, in the x86 backend, select Cond, 0, x →
(~Cond) & x, the backend will first check whether the cond node of
(~Cond) is a setcc node. If so, it will modify the comparison operator
of the condition.So the x86 backend cannot complete the optimization of
andn.In short, I think it is a better choice to keep the pattern of
vselect Cond, 000..., X instead of and+xor in combineDAG.

For commit, the first is code changes and x86 test(note 1), the second
is tests in other backend(node 2).

---------

Co-authored-by: Simon Pilgrim <llvm-dev@redking.me.uk>
2025-07-02 15:07:48 +01:00

149 lines
4.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define i64 @select_ogt_float(float %a, float %b) {
; CHECK-SD-LABEL: select_ogt_float:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fcmp s0, s1
; CHECK-SD-NEXT: mov w8, #4 // =0x4
; CHECK-SD-NEXT: csel x0, x8, xzr, gt
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: select_ogt_float:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fcmp s0, s1
; CHECK-GI-NEXT: cset w8, gt
; CHECK-GI-NEXT: lsl x0, x8, #2
; CHECK-GI-NEXT: ret
entry:
%cc = fcmp ogt float %a, %b
%sel = select i1 %cc, i64 4, i64 0
ret i64 %sel
}
define i64 @select_ule_float_inverse(float %a, float %b) {
; CHECK-SD-LABEL: select_ule_float_inverse:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fcmp s0, s1
; CHECK-SD-NEXT: mov w8, #4 // =0x4
; CHECK-SD-NEXT: csel x0, xzr, x8, le
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: select_ule_float_inverse:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fcmp s0, s1
; CHECK-GI-NEXT: cset w8, gt
; CHECK-GI-NEXT: lsl x0, x8, #2
; CHECK-GI-NEXT: ret
entry:
%cc = fcmp ule float %a, %b
%sel = select i1 %cc, i64 0, i64 4
ret i64 %sel
}
define i64 @select_eq_i32(i32 %a, i32 %b) {
; CHECK-SD-LABEL: select_eq_i32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: mov w8, #4 // =0x4
; CHECK-SD-NEXT: cmp w0, w1
; CHECK-SD-NEXT: csel x0, x8, xzr, eq
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: select_eq_i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: cmp w0, w1
; CHECK-GI-NEXT: cset w8, eq
; CHECK-GI-NEXT: lsl x0, x8, #2
; CHECK-GI-NEXT: ret
entry:
%cc = icmp eq i32 %a, %b
%sel = select i1 %cc, i64 4, i64 0
ret i64 %sel
}
define i64 @select_ne_i32_inverse(i32 %a, i32 %b) {
; CHECK-SD-LABEL: select_ne_i32_inverse:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: mov w8, #4 // =0x4
; CHECK-SD-NEXT: cmp w0, w1
; CHECK-SD-NEXT: csel x0, xzr, x8, ne
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: select_ne_i32_inverse:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: cmp w0, w1
; CHECK-GI-NEXT: cset w8, eq
; CHECK-GI-NEXT: lsl x0, x8, #2
; CHECK-GI-NEXT: ret
entry:
%cc = icmp ne i32 %a, %b
%sel = select i1 %cc, i64 0, i64 4
ret i64 %sel
}
define <2 x double> @select_olt_load_cmp(<2 x double> %a, ptr %src) {
; CHECK-SD-LABEL: select_olt_load_cmp:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ldr d1, [x0]
; CHECK-SD-NEXT: fcmgt v1.2s, v1.2s, #0.0
; CHECK-SD-NEXT: sshll v1.2d, v1.2s, #0
; CHECK-SD-NEXT: and v0.16b, v1.16b, v0.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: select_olt_load_cmp:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
; CHECK-GI-NEXT: fcmgt v1.2s, v1.2s, #0.0
; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
; CHECK-GI-NEXT: shl v1.2d, v1.2d, #63
; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #63
; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: ret
entry:
%l = load <2 x float>, ptr %src, align 4
%cmp = fcmp olt <2 x float> zeroinitializer, %l
%sel = select <2 x i1> %cmp, <2 x double> %a, <2 x double> zeroinitializer
ret <2 x double> %sel
}
define <4 x i32> @select_icmp_sgt(<4 x i32> %a, <4 x i8> %b) {
; CHECK-SD-LABEL: select_icmp_sgt:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: shl v1.4h, v1.4h, #8
; CHECK-SD-NEXT: sshr v1.4h, v1.4h, #8
; CHECK-SD-NEXT: cmgt v1.4h, v1.4h, #0
; CHECK-SD-NEXT: sshll v1.4s, v1.4h, #0
; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: select_icmp_sgt:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov w8, #0 // =0x0
; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: fmov s2, w8
; CHECK-GI-NEXT: mov v2.b[1], w8
; CHECK-GI-NEXT: mov v2.b[2], w8
; CHECK-GI-NEXT: mov v2.b[3], w8
; CHECK-GI-NEXT: cmgt v1.8b, v1.8b, v2.8b
; CHECK-GI-NEXT: umov w8, v1.b[0]
; CHECK-GI-NEXT: umov w9, v1.b[1]
; CHECK-GI-NEXT: fmov s2, w8
; CHECK-GI-NEXT: umov w8, v1.b[2]
; CHECK-GI-NEXT: mov v2.s[1], w9
; CHECK-GI-NEXT: umov w9, v1.b[3]
; CHECK-GI-NEXT: mov v2.s[2], w8
; CHECK-GI-NEXT: mov v2.s[3], w9
; CHECK-GI-NEXT: shl v1.4s, v2.4s, #31
; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
; CHECK-GI-NEXT: bic v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
entry:
%cmp = icmp sgt <4 x i8> %b, zeroinitializer
%sel = select <4 x i1> %cmp, <4 x i32> zeroinitializer, <4 x i32> %a
ret <4 x i32> %sel
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}