llvm-project/llvm/test/CodeGen/X86/fcmp-logic.ll
Phoebe Wang 76e14deb4a
[X86][BreakFalseDeps] Using reverse order for undef register selection (#137569)
BreakFalseDeps picks the best register for undef operands if
instructions have false dependency. The problem is if the instruction is
close to the beginning of the function, ReachingDefAnalysis is over
optimism to the unused registers, which results in collision with
registers just defined in the caller.

This patch changes the selection of undef register in an reverse order,
which reduces the probability of register collisions between caller and
callee. It brings improvement in some of our internal benchmarks with
negligible effect on other benchmarks.
2025-06-11 22:08:20 +08:00

436 lines
13 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=SSE2
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512
define i1 @olt_ole_and_f32(float %w, float %x, float %y, float %z) {
; SSE2-LABEL: olt_ole_and_f32:
; SSE2: # %bb.0:
; SSE2-NEXT: cmpless %xmm3, %xmm2
; SSE2-NEXT: cmpltss %xmm1, %xmm0
; SSE2-NEXT: andps %xmm2, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; AVX1-LABEL: olt_ole_and_f32:
; AVX1: # %bb.0:
; AVX1-NEXT: vcmpless %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vcmpltss %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: retq
;
; AVX512-LABEL: olt_ole_and_f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpless %xmm3, %xmm2, %k0
; AVX512-NEXT: vcmpltss %xmm1, %xmm0, %k1
; AVX512-NEXT: kandw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%f1 = fcmp olt float %w, %x
%f2 = fcmp ole float %y, %z
%r = and i1 %f1, %f2
ret i1 %r
}
define i1 @oge_oeq_or_f32(float %w, float %x, float %y, float %z) {
; SSE2-LABEL: oge_oeq_or_f32:
; SSE2: # %bb.0:
; SSE2-NEXT: cmpeqss %xmm3, %xmm2
; SSE2-NEXT: cmpless %xmm0, %xmm1
; SSE2-NEXT: orps %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; AVX1-LABEL: oge_oeq_or_f32:
; AVX1: # %bb.0:
; AVX1-NEXT: vcmpeqss %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vcmpless %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vorps %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: retq
;
; AVX512-LABEL: oge_oeq_or_f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpeqss %xmm3, %xmm2, %k0
; AVX512-NEXT: vcmpless %xmm0, %xmm1, %k1
; AVX512-NEXT: korw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%f1 = fcmp oge float %w, %x
%f2 = fcmp oeq float %y, %z
%r = or i1 %f1, %f2
ret i1 %r
}
define i1 @ord_one_xor_f32(float %w, float %x, float %y, float %z) {
; SSE2-LABEL: ord_one_xor_f32:
; SSE2: # %bb.0:
; SSE2-NEXT: ucomiss %xmm1, %xmm0
; SSE2-NEXT: setnp %cl
; SSE2-NEXT: ucomiss %xmm3, %xmm2
; SSE2-NEXT: setne %al
; SSE2-NEXT: xorb %cl, %al
; SSE2-NEXT: retq
;
; AVX1-LABEL: ord_one_xor_f32:
; AVX1: # %bb.0:
; AVX1-NEXT: vcmpneq_oqss %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vcmpordss %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: retq
;
; AVX512-LABEL: ord_one_xor_f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpneq_oqss %xmm3, %xmm2, %k0
; AVX512-NEXT: vcmpordss %xmm1, %xmm0, %k1
; AVX512-NEXT: kxorw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%f1 = fcmp ord float %w, %x
%f2 = fcmp one float %y, %z
%r = xor i1 %f1, %f2
ret i1 %r
}
; PR51068
define i1 @une_oeq_xor_f32(float %w, float %x, float %y, float %z) {
; SSE2-LABEL: une_oeq_xor_f32:
; SSE2: # %bb.0:
; SSE2-NEXT: cmpeqss %xmm3, %xmm2
; SSE2-NEXT: cmpneqss %xmm1, %xmm0
; SSE2-NEXT: xorps %xmm2, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; AVX1-LABEL: une_oeq_xor_f32:
; AVX1: # %bb.0:
; AVX1-NEXT: vcmpeqss %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vcmpneqss %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: retq
;
; AVX512-LABEL: une_oeq_xor_f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpeqss %xmm3, %xmm2, %k0
; AVX512-NEXT: vcmpneqss %xmm1, %xmm0, %k1
; AVX512-NEXT: kxorw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%f1 = fcmp une float %w, %x
%f2 = fcmp oeq float %y, %z
%r = xor i1 %f1, %f2
ret i1 %r
}
define i1 @une_ugt_and_f64(double %w, double %x, double %y, double %z) {
; SSE2-LABEL: une_ugt_and_f64:
; SSE2: # %bb.0:
; SSE2-NEXT: cmpnlesd %xmm3, %xmm2
; SSE2-NEXT: cmpneqsd %xmm1, %xmm0
; SSE2-NEXT: andpd %xmm2, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; AVX1-LABEL: une_ugt_and_f64:
; AVX1: # %bb.0:
; AVX1-NEXT: vcmpnlesd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vcmpneqsd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vandpd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: retq
;
; AVX512-LABEL: une_ugt_and_f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpnlesd %xmm3, %xmm2, %k0
; AVX512-NEXT: vcmpneqsd %xmm1, %xmm0, %k1
; AVX512-NEXT: kandw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%f1 = fcmp une double %w, %x
%f2 = fcmp ugt double %y, %z
%r = and i1 %f1, %f2
ret i1 %r
}
define i1 @ult_uge_or_f64(double %w, double %x, double %y, double %z) {
; SSE2-LABEL: ult_uge_or_f64:
; SSE2: # %bb.0:
; SSE2-NEXT: cmpnltsd %xmm3, %xmm2
; SSE2-NEXT: cmpnlesd %xmm0, %xmm1
; SSE2-NEXT: orpd %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; AVX1-LABEL: ult_uge_or_f64:
; AVX1: # %bb.0:
; AVX1-NEXT: vcmpnltsd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vcmpnlesd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vorpd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: retq
;
; AVX512-LABEL: ult_uge_or_f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpnltsd %xmm3, %xmm2, %k0
; AVX512-NEXT: vcmpnlesd %xmm0, %xmm1, %k1
; AVX512-NEXT: korw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%f1 = fcmp ult double %w, %x
%f2 = fcmp uge double %y, %z
%r = or i1 %f1, %f2
ret i1 %r
}
define i1 @une_uno_xor_f64(double %w, double %x, double %y, double %z) {
; SSE2-LABEL: une_uno_xor_f64:
; SSE2: # %bb.0:
; SSE2-NEXT: cmpunordsd %xmm3, %xmm2
; SSE2-NEXT: cmpneqsd %xmm1, %xmm0
; SSE2-NEXT: xorpd %xmm2, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; AVX1-LABEL: une_uno_xor_f64:
; AVX1: # %bb.0:
; AVX1-NEXT: vcmpunordsd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vcmpneqsd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: retq
;
; AVX512-LABEL: une_uno_xor_f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpunordsd %xmm3, %xmm2, %k0
; AVX512-NEXT: vcmpneqsd %xmm1, %xmm0, %k1
; AVX512-NEXT: kxorw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%f1 = fcmp une double %w, %x
%f2 = fcmp uno double %y, %z
%r = xor i1 %f1, %f2
ret i1 %r
}
; This uses ucomis because the types do not match.
; TODO: Merge down to narrow type?
define i1 @olt_olt_and_f32_f64(float %w, float %x, double %y, double %z) {
; SSE2-LABEL: olt_olt_and_f32_f64:
; SSE2: # %bb.0:
; SSE2-NEXT: ucomiss %xmm0, %xmm1
; SSE2-NEXT: seta %cl
; SSE2-NEXT: ucomisd %xmm2, %xmm3
; SSE2-NEXT: seta %al
; SSE2-NEXT: andb %cl, %al
; SSE2-NEXT: retq
;
; AVX-LABEL: olt_olt_and_f32_f64:
; AVX: # %bb.0:
; AVX-NEXT: vucomiss %xmm0, %xmm1
; AVX-NEXT: seta %cl
; AVX-NEXT: vucomisd %xmm2, %xmm3
; AVX-NEXT: seta %al
; AVX-NEXT: andb %cl, %al
; AVX-NEXT: retq
%f1 = fcmp olt float %w, %x
%f2 = fcmp olt double %y, %z
%r = and i1 %f1, %f2
ret i1 %r
}
; This uses ucomis because of extra uses.
define i1 @une_uno_xor_f64_use1(double %w, double %x, double %y, double %z, ptr %p) {
; SSE2-LABEL: une_uno_xor_f64_use1:
; SSE2: # %bb.0:
; SSE2-NEXT: ucomisd %xmm1, %xmm0
; SSE2-NEXT: setp %al
; SSE2-NEXT: setne %cl
; SSE2-NEXT: orb %al, %cl
; SSE2-NEXT: movb %cl, (%rdi)
; SSE2-NEXT: ucomisd %xmm3, %xmm2
; SSE2-NEXT: setp %al
; SSE2-NEXT: xorb %cl, %al
; SSE2-NEXT: retq
;
; AVX-LABEL: une_uno_xor_f64_use1:
; AVX: # %bb.0:
; AVX-NEXT: vucomisd %xmm1, %xmm0
; AVX-NEXT: setp %al
; AVX-NEXT: setne %cl
; AVX-NEXT: orb %al, %cl
; AVX-NEXT: movb %cl, (%rdi)
; AVX-NEXT: vucomisd %xmm3, %xmm2
; AVX-NEXT: setp %al
; AVX-NEXT: xorb %cl, %al
; AVX-NEXT: retq
%f1 = fcmp une double %w, %x
store i1 %f1, ptr %p
%f2 = fcmp uno double %y, %z
%r = xor i1 %f1, %f2
ret i1 %r
}
; This uses ucomis because of extra uses.
define i1 @une_uno_xor_f64_use2(double %w, double %x, double %y, double %z, ptr %p) {
; SSE2-LABEL: une_uno_xor_f64_use2:
; SSE2: # %bb.0:
; SSE2-NEXT: ucomisd %xmm1, %xmm0
; SSE2-NEXT: setp %al
; SSE2-NEXT: setne %cl
; SSE2-NEXT: orb %al, %cl
; SSE2-NEXT: ucomisd %xmm3, %xmm2
; SSE2-NEXT: setp %al
; SSE2-NEXT: setp (%rdi)
; SSE2-NEXT: xorb %cl, %al
; SSE2-NEXT: retq
;
; AVX-LABEL: une_uno_xor_f64_use2:
; AVX: # %bb.0:
; AVX-NEXT: vucomisd %xmm1, %xmm0
; AVX-NEXT: setp %al
; AVX-NEXT: setne %cl
; AVX-NEXT: orb %al, %cl
; AVX-NEXT: vucomisd %xmm3, %xmm2
; AVX-NEXT: setp %al
; AVX-NEXT: setp (%rdi)
; AVX-NEXT: xorb %cl, %al
; AVX-NEXT: retq
%f1 = fcmp une double %w, %x
%f2 = fcmp uno double %y, %z
store i1 %f2, ptr %p
%r = xor i1 %f1, %f2
ret i1 %r
}
; bool f32cmp3(float x, float y, float z, float w) {
; return ((x > 0) || (y > 0)) != (z < w);
; }
define i1 @f32cmp3(float %x, float %y, float %z, float %w) {
; SSE2-LABEL: f32cmp3:
; SSE2: # %bb.0:
; SSE2-NEXT: xorps %xmm4, %xmm4
; SSE2-NEXT: xorps %xmm5, %xmm5
; SSE2-NEXT: cmpltss %xmm1, %xmm5
; SSE2-NEXT: cmpltss %xmm0, %xmm4
; SSE2-NEXT: orps %xmm5, %xmm4
; SSE2-NEXT: movd %xmm4, %ecx
; SSE2-NEXT: ucomiss %xmm2, %xmm3
; SSE2-NEXT: seta %al
; SSE2-NEXT: xorb %cl, %al
; SSE2-NEXT: retq
;
; AVX1-LABEL: f32cmp3:
; AVX1: # %bb.0:
; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
; AVX1-NEXT: vcmpltss %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vcmpltss %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %ecx
; AVX1-NEXT: vucomiss %xmm2, %xmm3
; AVX1-NEXT: seta %al
; AVX1-NEXT: xorb %cl, %al
; AVX1-NEXT: retq
;
; AVX512-LABEL: f32cmp3:
; AVX512: # %bb.0:
; AVX512-NEXT: vxorps %xmm4, %xmm4, %xmm4
; AVX512-NEXT: vcmpltss %xmm1, %xmm4, %k0
; AVX512-NEXT: vcmpltss %xmm0, %xmm4, %k1
; AVX512-NEXT: korw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %ecx
; AVX512-NEXT: vucomiss %xmm2, %xmm3
; AVX512-NEXT: seta %al
; AVX512-NEXT: xorb %cl, %al
; AVX512-NEXT: retq
%cmpx = fcmp ogt float %x, 0.0
%cmpy = fcmp ogt float %y, 0.0
%or = select i1 %cmpx, i1 true, i1 %cmpy
%cmpzw = fcmp olt float %z, %w
%r = xor i1 %or, %cmpzw
ret i1 %r
}
define i1 @PR140534(i32 %a0, i32 %a1, i32 %a2) {
; SSE2-LABEL: PR140534:
; SSE2: # %bb.0:
; SSE2-NEXT: movl %edi, %eax
; SSE2-NEXT: cvtsi2sd %rax, %xmm0
; SSE2-NEXT: movl %esi, %eax
; SSE2-NEXT: cvtsi2sd %rax, %xmm1
; SSE2-NEXT: movl %edx, %eax
; SSE2-NEXT: cvtsi2sd %rax, %xmm2
; SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm3
; SSE2-NEXT: cmpltsd %xmm2, %xmm3
; SSE2-NEXT: cmpltsd %xmm0, %xmm1
; SSE2-NEXT: orpd %xmm3, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; AVX1-LABEL: PR140534:
; AVX1: # %bb.0:
; AVX1-NEXT: movl %edi, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX1-NEXT: movl %esi, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX1-NEXT: movl %edx, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX1-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vcmpltsd %xmm2, %xmm1, %xmm2
; AVX1-NEXT: vcmpltsd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vorpd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: retq
;
; AVX512-LABEL: PR140534:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvtusi2sd %edi, %xmm15, %xmm0
; AVX512-NEXT: vcvtusi2sd %esi, %xmm15, %xmm1
; AVX512-NEXT: vcvtusi2sd %edx, %xmm15, %xmm2
; AVX512-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vcmpltsd %xmm2, %xmm1, %k0
; AVX512-NEXT: vcmpltsd %xmm0, %xmm1, %k1
; AVX512-NEXT: korw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%conv0 = uitofp i32 %a0 to double
%conv1 = uitofp i32 %a1 to double
%conv2 = uitofp i32 %a2 to double
%mul = fmul double %conv1, 0x3FF6A09E667F3BCD
%cmp0 = fcmp olt double %mul, %conv0
%cmp2 = fcmp olt double %mul, %conv2
%or = or i1 %cmp0, %cmp2
ret i1 %or
}