Craig Topper a4373f6753 [X86] Don't combine (x86cmp (trunc (movmsk (bitcast X))), 0) if the truncate discards unknown bits.
We have transform that tries turn a pmovmskb into movmskps/pd or
movmskps to movmskpd. This transform isn't valid if the truncate
discarded bits that might be set by the original movmsk.

We could fix this by inserting an AND after the new movmsk to discard
the equivalent of the truncated bits, but I've left that for later
patch.

Fixes PR52567.

Differential Revision: https://reviews.llvm.org/D114306
2021-11-19 21:50:35 -08:00

28 lines
1.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
; The and in the test below discards half the bits from vector icmp result.
; We use a testb after a pmovmskb to examine only 8 bits.
define i32 @foo(<4 x float> %arg) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.00000005E-3,1.00000005E-3,1.00000005E-3,1.00000005E-3]
; CHECK-NEXT: cmpltps %xmm0, %xmm1
; CHECK-NEXT: pmovmskb %xmm1, %ecx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %cl, %cl
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
bb:
%tmp = fcmp ogt <4 x float> %arg, <float 0x3F50624DE0000000, float 0x3F50624DE0000000, float 0x3F50624DE0000000, float 0x3F50624DE0000000>
%tmp1 = sext <4 x i1> %tmp to <4 x i32>
%tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
%tmp3 = icmp slt <16 x i8> %tmp2, zeroinitializer
%tmp4 = bitcast <16 x i1> %tmp3 to i16
%tmp5 = and i16 %tmp4, 255
%tmp6 = icmp eq i16 %tmp5, 0
%tmp7 = zext i1 %tmp6 to i32
ret i32 %tmp7
}