llvm-project/llvm/test/CodeGen/X86/known-bits.ll
Craig Topper 8b5f2ab2a4 Recommit r367901 "[X86] Enable -x86-experimental-vector-widening-legalization by default."
The assert that caused this to be reverted should be fixed now.

Original commit message:

This patch changes our defualt legalization behavior for 16, 32, and
64 bit vectors with i8/i16/i32/i64 scalar types from promotion to
widening. For example, v8i8 will now be widened to v16i8 instead of
promoted to v8i16. This keeps the elements widths the same and pads
with undef elements. We believe this is a better legalization strategy.
But it carries some issues due to the fragmented vector ISA. For
example, i8 shifts and multiplies get widened and then later have
to be promoted/split into vXi16 vectors.

This has the potential to cause regressions so we wanted to get
it in early in the 10.0 cycle so we have plenty of time to
address them.

Next steps will be to merge tests that explicitly test the command
line option. And then we can remove the option and its associated
code.

llvm-svn: 368183
2019-08-07 16:24:26 +00:00

278 lines
8.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
define void @knownbits_zext_in_reg(i8*) nounwind {
; X32-LABEL: knownbits_zext_in_reg:
; X32: # %bb.0: # %BB
; X32-NEXT: pushl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movzbl (%eax), %ecx
; X32-NEXT: imull $101, %ecx, %eax
; X32-NEXT: shrl $14, %eax
; X32-NEXT: imull $177, %ecx, %edx
; X32-NEXT: shrl $14, %edx
; X32-NEXT: movzbl %al, %ecx
; X32-NEXT: xorl %ebx, %ebx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB0_1: # %CF
; X32-NEXT: # =>This Loop Header: Depth=1
; X32-NEXT: # Child Loop BB0_2 Depth 2
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: divb %dl
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB0_2: # %CF237
; X32-NEXT: # Parent Loop BB0_1 Depth=1
; X32-NEXT: # => This Inner Loop Header: Depth=2
; X32-NEXT: testb %bl, %bl
; X32-NEXT: jne .LBB0_2
; X32-NEXT: jmp .LBB0_1
;
; X64-LABEL: knownbits_zext_in_reg:
; X64: # %bb.0: # %BB
; X64-NEXT: movzbl (%rdi), %eax
; X64-NEXT: imull $101, %eax, %ecx
; X64-NEXT: shrl $14, %ecx
; X64-NEXT: imull $177, %eax, %edx
; X64-NEXT: shrl $14, %edx
; X64-NEXT: movzbl %cl, %ecx
; X64-NEXT: xorl %esi, %esi
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: .LBB0_1: # %CF
; X64-NEXT: # =>This Loop Header: Depth=1
; X64-NEXT: # Child Loop BB0_2 Depth 2
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: divb %dl
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: .LBB0_2: # %CF237
; X64-NEXT: # Parent Loop BB0_1 Depth=1
; X64-NEXT: # => This Inner Loop Header: Depth=2
; X64-NEXT: testb %sil, %sil
; X64-NEXT: jne .LBB0_2
; X64-NEXT: jmp .LBB0_1
BB:
%L5 = load i8, i8* %0
%Sl9 = select i1 true, i8 %L5, i8 undef
%B21 = udiv i8 %Sl9, -93
%B22 = udiv i8 %Sl9, 93
br label %CF
CF: ; preds = %CF246, %BB
%I40 = insertelement <4 x i8> zeroinitializer, i8 %B21, i32 1
%I41 = insertelement <4 x i8> zeroinitializer, i8 %B22, i32 1
%B41 = srem <4 x i8> %I40, %I41
br label %CF237
CF237: ; preds = %CF237, %CF
%Cmp73 = icmp ne i1 undef, undef
br i1 %Cmp73, label %CF237, label %CF246
CF246: ; preds = %CF237
%Cmp117 = icmp ult <4 x i8> %B41, undef
%E156 = extractelement <4 x i1> %Cmp117, i32 2
br label %CF
}
define i32 @knownbits_mask_add_lshr(i32 %a0, i32 %a1) nounwind {
; X32-LABEL: knownbits_mask_add_lshr:
; X32: # %bb.0:
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: retl
;
; X64-LABEL: knownbits_mask_add_lshr:
; X64: # %bb.0:
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: retq
%1 = and i32 %a0, 32767
%2 = and i32 %a1, 32766
%3 = add i32 %1, %2
%4 = lshr i32 %3, 17
ret i32 %4
}
define i128 @knownbits_mask_addc_shl(i64 %a0, i64 %a1, i64 %a2) nounwind {
; X32-LABEL: knownbits_mask_addc_shl:
; X32: # %bb.0:
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl $-1024, %esi # imm = 0xFC00
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
; X32-NEXT: andl %esi, %edi
; X32-NEXT: andl {{[0-9]+}}(%esp), %esi
; X32-NEXT: addl %edi, %esi
; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: shldl $22, %edx, %ecx
; X32-NEXT: shldl $22, %esi, %edx
; X32-NEXT: movl %edx, 8(%eax)
; X32-NEXT: movl %ecx, 12(%eax)
; X32-NEXT: movl $0, 4(%eax)
; X32-NEXT: movl $0, (%eax)
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: retl $4
;
; X64-LABEL: knownbits_mask_addc_shl:
; X64: # %bb.0:
; X64-NEXT: andq $-1024, %rdi # imm = 0xFC00
; X64-NEXT: andq $-1024, %rsi # imm = 0xFC00
; X64-NEXT: addq %rdi, %rsi
; X64-NEXT: adcl $0, %edx
; X64-NEXT: shldq $54, %rsi, %rdx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: retq
%1 = and i64 %a0, -1024
%2 = zext i64 %1 to i128
%3 = and i64 %a1, -1024
%4 = zext i64 %3 to i128
%5 = add i128 %2, %4
%6 = zext i64 %a2 to i128
%7 = shl i128 %6, 64
%8 = add i128 %5, %7
%9 = shl i128 %8, 54
ret i128 %9
}
define {i32, i1} @knownbits_uaddo_saddo(i64 %a0, i64 %a1) nounwind {
; X32-LABEL: knownbits_uaddo_saddo:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: addl %eax, %edx
; X32-NEXT: setb %bl
; X32-NEXT: testl %eax, %eax
; X32-NEXT: setns %al
; X32-NEXT: testl %ecx, %ecx
; X32-NEXT: setns %cl
; X32-NEXT: cmpb %al, %cl
; X32-NEXT: sete %al
; X32-NEXT: testl %edx, %edx
; X32-NEXT: setns %dl
; X32-NEXT: cmpb %dl, %cl
; X32-NEXT: setne %dl
; X32-NEXT: andb %al, %dl
; X32-NEXT: orb %bl, %dl
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: popl %ebx
; X32-NEXT: retl
;
; X64-LABEL: knownbits_uaddo_saddo:
; X64: # %bb.0:
; X64-NEXT: shlq $32, %rdi
; X64-NEXT: shlq $32, %rsi
; X64-NEXT: addq %rdi, %rsi
; X64-NEXT: setb %al
; X64-NEXT: seto %dl
; X64-NEXT: orb %al, %dl
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: retq
%1 = shl i64 %a0, 32
%2 = shl i64 %a1, 32
%u = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %1, i64 %2)
%uval = extractvalue {i64, i1} %u, 0
%uovf = extractvalue {i64, i1} %u, 1
%s = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %1, i64 %2)
%sval = extractvalue {i64, i1} %s, 0
%sovf = extractvalue {i64, i1} %s, 1
%sum = add i64 %uval, %sval
%3 = trunc i64 %sum to i32
%4 = or i1 %uovf, %sovf
%ret0 = insertvalue {i32, i1} undef, i32 %3, 0
%ret1 = insertvalue {i32, i1} %ret0, i1 %4, 1
ret {i32, i1} %ret1
}
define {i32, i1} @knownbits_usubo_ssubo(i64 %a0, i64 %a1) nounwind {
; X32-LABEL: knownbits_usubo_ssubo:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: subl %eax, %edx
; X32-NEXT: setb %bl
; X32-NEXT: testl %eax, %eax
; X32-NEXT: setns %al
; X32-NEXT: testl %ecx, %ecx
; X32-NEXT: setns %cl
; X32-NEXT: cmpb %al, %cl
; X32-NEXT: setne %al
; X32-NEXT: testl %edx, %edx
; X32-NEXT: setns %dl
; X32-NEXT: cmpb %dl, %cl
; X32-NEXT: setne %dl
; X32-NEXT: andb %al, %dl
; X32-NEXT: orb %bl, %dl
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: popl %ebx
; X32-NEXT: retl
;
; X64-LABEL: knownbits_usubo_ssubo:
; X64: # %bb.0:
; X64-NEXT: shlq $32, %rdi
; X64-NEXT: shlq $32, %rsi
; X64-NEXT: cmpq %rsi, %rdi
; X64-NEXT: setb %al
; X64-NEXT: seto %dl
; X64-NEXT: orb %al, %dl
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: retq
%1 = shl i64 %a0, 32
%2 = shl i64 %a1, 32
%u = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %1, i64 %2)
%uval = extractvalue {i64, i1} %u, 0
%uovf = extractvalue {i64, i1} %u, 1
%s = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %1, i64 %2)
%sval = extractvalue {i64, i1} %s, 0
%sovf = extractvalue {i64, i1} %s, 1
%sum = add i64 %uval, %sval
%3 = trunc i64 %sum to i32
%4 = or i1 %uovf, %sovf
%ret0 = insertvalue {i32, i1} undef, i32 %3, 0
%ret1 = insertvalue {i32, i1} %ret0, i1 %4, 1
ret {i32, i1} %ret1
}
declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
define i32 @knownbits_fshl(i32 %a0) nounwind {
; X32-LABEL: knownbits_fshl:
; X32: # %bb.0:
; X32-NEXT: movl $3, %eax
; X32-NEXT: retl
;
; X64-LABEL: knownbits_fshl:
; X64: # %bb.0:
; X64-NEXT: movl $3, %eax
; X64-NEXT: retq
%1 = tail call i32 @llvm.fshl.i32(i32 %a0, i32 -1, i32 5)
%2 = and i32 %1, 3
ret i32 %2
}
define i32 @knownbits_fshr(i32 %a0) nounwind {
; X32-LABEL: knownbits_fshr:
; X32: # %bb.0:
; X32-NEXT: movl $3, %eax
; X32-NEXT: retl
;
; X64-LABEL: knownbits_fshr:
; X64: # %bb.0:
; X64-NEXT: movl $3, %eax
; X64-NEXT: retq
%1 = tail call i32 @llvm.fshr.i32(i32 %a0, i32 -1, i32 5)
%2 = and i32 %1, 3
ret i32 %2
}
declare i32 @llvm.fshl.i32(i32, i32, i32) nounwind readnone
declare i32 @llvm.fshr.i32(i32, i32, i32) nounwind readnone