
This PR resolves https://github.com/llvm/llvm-project/issues/144513 The modification include five pattern : 1.vselect Cond, 0, 0 → 0 2.vselect Cond, -1, 0 → bitcast Cond 3.vselect Cond, -1, x → or Cond, x 4.vselect Cond, x, 0 → and Cond, x 5.vselect Cond, 000..., X -> andn Cond, X 1-4 have been migrated to DAGCombine. 5 still in x86 code. The reason is that you cannot use the andn instruction directly in DAGCombine, you can only use and+xor, which will introduce optimization order issues. For example, in the x86 backend, select Cond, 0, x → (~Cond) & x, the backend will first check whether the cond node of (~Cond) is a setcc node. If so, it will modify the comparison operator of the condition.So the x86 backend cannot complete the optimization of andn.In short, I think it is a better choice to keep the pattern of vselect Cond, 000..., X instead of and+xor in combineDAG. For commit, the first is code changes and x86 test(note 1), the second is tests in other backend(node 2). --------- Co-authored-by: Simon Pilgrim <llvm-dev@redking.me.uk>
149 lines
4.7 KiB
LLVM
149 lines
4.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
|
|
; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
|
|
|
|
define i64 @select_ogt_float(float %a, float %b) {
|
|
; CHECK-SD-LABEL: select_ogt_float:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: fcmp s0, s1
|
|
; CHECK-SD-NEXT: mov w8, #4 // =0x4
|
|
; CHECK-SD-NEXT: csel x0, x8, xzr, gt
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: select_ogt_float:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: fcmp s0, s1
|
|
; CHECK-GI-NEXT: cset w8, gt
|
|
; CHECK-GI-NEXT: lsl x0, x8, #2
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%cc = fcmp ogt float %a, %b
|
|
%sel = select i1 %cc, i64 4, i64 0
|
|
ret i64 %sel
|
|
}
|
|
|
|
define i64 @select_ule_float_inverse(float %a, float %b) {
|
|
; CHECK-SD-LABEL: select_ule_float_inverse:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: fcmp s0, s1
|
|
; CHECK-SD-NEXT: mov w8, #4 // =0x4
|
|
; CHECK-SD-NEXT: csel x0, xzr, x8, le
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: select_ule_float_inverse:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: fcmp s0, s1
|
|
; CHECK-GI-NEXT: cset w8, gt
|
|
; CHECK-GI-NEXT: lsl x0, x8, #2
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%cc = fcmp ule float %a, %b
|
|
%sel = select i1 %cc, i64 0, i64 4
|
|
ret i64 %sel
|
|
}
|
|
|
|
define i64 @select_eq_i32(i32 %a, i32 %b) {
|
|
; CHECK-SD-LABEL: select_eq_i32:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: mov w8, #4 // =0x4
|
|
; CHECK-SD-NEXT: cmp w0, w1
|
|
; CHECK-SD-NEXT: csel x0, x8, xzr, eq
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: select_eq_i32:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: cmp w0, w1
|
|
; CHECK-GI-NEXT: cset w8, eq
|
|
; CHECK-GI-NEXT: lsl x0, x8, #2
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%cc = icmp eq i32 %a, %b
|
|
%sel = select i1 %cc, i64 4, i64 0
|
|
ret i64 %sel
|
|
}
|
|
|
|
define i64 @select_ne_i32_inverse(i32 %a, i32 %b) {
|
|
; CHECK-SD-LABEL: select_ne_i32_inverse:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: mov w8, #4 // =0x4
|
|
; CHECK-SD-NEXT: cmp w0, w1
|
|
; CHECK-SD-NEXT: csel x0, xzr, x8, ne
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: select_ne_i32_inverse:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: cmp w0, w1
|
|
; CHECK-GI-NEXT: cset w8, eq
|
|
; CHECK-GI-NEXT: lsl x0, x8, #2
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%cc = icmp ne i32 %a, %b
|
|
%sel = select i1 %cc, i64 0, i64 4
|
|
ret i64 %sel
|
|
}
|
|
|
|
define <2 x double> @select_olt_load_cmp(<2 x double> %a, ptr %src) {
|
|
; CHECK-SD-LABEL: select_olt_load_cmp:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: ldr d1, [x0]
|
|
; CHECK-SD-NEXT: fcmgt v1.2s, v1.2s, #0.0
|
|
; CHECK-SD-NEXT: sshll v1.2d, v1.2s, #0
|
|
; CHECK-SD-NEXT: and v0.16b, v1.16b, v0.16b
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: select_olt_load_cmp:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: ldr d1, [x0]
|
|
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
|
|
; CHECK-GI-NEXT: fcmgt v1.2s, v1.2s, #0.0
|
|
; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
|
|
; CHECK-GI-NEXT: shl v1.2d, v1.2d, #63
|
|
; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #63
|
|
; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%l = load <2 x float>, ptr %src, align 4
|
|
%cmp = fcmp olt <2 x float> zeroinitializer, %l
|
|
%sel = select <2 x i1> %cmp, <2 x double> %a, <2 x double> zeroinitializer
|
|
ret <2 x double> %sel
|
|
}
|
|
|
|
define <4 x i32> @select_icmp_sgt(<4 x i32> %a, <4 x i8> %b) {
|
|
; CHECK-SD-LABEL: select_icmp_sgt:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: shl v1.4h, v1.4h, #8
|
|
; CHECK-SD-NEXT: sshr v1.4h, v1.4h, #8
|
|
; CHECK-SD-NEXT: cmgt v1.4h, v1.4h, #0
|
|
; CHECK-SD-NEXT: sshll v1.4s, v1.4h, #0
|
|
; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: select_icmp_sgt:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: mov w8, #0 // =0x0
|
|
; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b
|
|
; CHECK-GI-NEXT: fmov s2, w8
|
|
; CHECK-GI-NEXT: mov v2.b[1], w8
|
|
; CHECK-GI-NEXT: mov v2.b[2], w8
|
|
; CHECK-GI-NEXT: mov v2.b[3], w8
|
|
; CHECK-GI-NEXT: cmgt v1.8b, v1.8b, v2.8b
|
|
; CHECK-GI-NEXT: umov w8, v1.b[0]
|
|
; CHECK-GI-NEXT: umov w9, v1.b[1]
|
|
; CHECK-GI-NEXT: fmov s2, w8
|
|
; CHECK-GI-NEXT: umov w8, v1.b[2]
|
|
; CHECK-GI-NEXT: mov v2.s[1], w9
|
|
; CHECK-GI-NEXT: umov w9, v1.b[3]
|
|
; CHECK-GI-NEXT: mov v2.s[2], w8
|
|
; CHECK-GI-NEXT: mov v2.s[3], w9
|
|
; CHECK-GI-NEXT: shl v1.4s, v2.4s, #31
|
|
; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
|
|
; CHECK-GI-NEXT: bic v0.16b, v0.16b, v1.16b
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%cmp = icmp sgt <4 x i8> %b, zeroinitializer
|
|
%sel = select <4 x i1> %cmp, <4 x i32> zeroinitializer, <4 x i32> %a
|
|
ret <4 x i32> %sel
|
|
}
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; CHECK: {{.*}}
|