
We have transform that tries turn a pmovmskb into movmskps/pd or movmskps to movmskpd. This transform isn't valid if the truncate discarded bits that might be set by the original movmsk. We could fix this by inserting an AND after the new movmsk to discard the equivalent of the truncated bits, but I've left that for later patch. Fixes PR52567. Differential Revision: https://reviews.llvm.org/D114306
28 lines
1.1 KiB
LLVM
28 lines
1.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
|
|
|
|
; The and in the test below discards half the bits from vector icmp result.
|
|
; We use a testb after a pmovmskb to examine only 8 bits.
|
|
|
|
define i32 @foo(<4 x float> %arg) {
|
|
; CHECK-LABEL: foo:
|
|
; CHECK: # %bb.0: # %bb
|
|
; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.00000005E-3,1.00000005E-3,1.00000005E-3,1.00000005E-3]
|
|
; CHECK-NEXT: cmpltps %xmm0, %xmm1
|
|
; CHECK-NEXT: pmovmskb %xmm1, %ecx
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %cl, %cl
|
|
; CHECK-NEXT: sete %al
|
|
; CHECK-NEXT: retq
|
|
bb:
|
|
%tmp = fcmp ogt <4 x float> %arg, <float 0x3F50624DE0000000, float 0x3F50624DE0000000, float 0x3F50624DE0000000, float 0x3F50624DE0000000>
|
|
%tmp1 = sext <4 x i1> %tmp to <4 x i32>
|
|
%tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
|
|
%tmp3 = icmp slt <16 x i8> %tmp2, zeroinitializer
|
|
%tmp4 = bitcast <16 x i1> %tmp3 to i16
|
|
%tmp5 = and i16 %tmp4, 255
|
|
%tmp6 = icmp eq i16 %tmp5, 0
|
|
%tmp7 = zext i1 %tmp6 to i32
|
|
ret i32 %tmp7
|
|
}
|