Sanjay Patel 40a50f8701 [x86] avoid false dependency stall on 'sbb' with same source reg
This is effectively inverting the transform added with D116804
because the downside of the false dependency of something like
"sbb %eax, %eax" is much greater than the upside of eliminating
a zeroing instruction on (all?) Intel CPUs.

Differential Revision: https://reviews.llvm.org/D118843
2022-02-07 10:12:12 -05:00

22 lines
764 B
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=i686-unknown-linux-gnu %s -o - -mattr=avx512bw | FileCheck %s
define void @test3(i32 %c, <64 x i1>* %ptr) {
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp)
; CHECK-NEXT: sbbl %ecx, %ecx
; CHECK-NEXT: kmovd %ecx, %k0
; CHECK-NEXT: kunpckdq %k0, %k0, %k0
; CHECK-NEXT: kmovq %k0, (%eax)
; CHECK-NEXT: retl
%cmp = icmp eq i32 %c, 0
%insert = insertelement <64 x i1> undef, i1 %cmp, i32 0
%shuf = shufflevector <64 x i1> %insert, <64 x i1> undef, <64 x i32> zeroinitializer
store <64 x i1> %shuf, <64 x i1>* %ptr
ret void
}