The assert that caused this to be reverted should be fixed now. Original commit message: This patch changes our defualt legalization behavior for 16, 32, and 64 bit vectors with i8/i16/i32/i64 scalar types from promotion to widening. For example, v8i8 will now be widened to v16i8 instead of promoted to v8i16. This keeps the elements widths the same and pads with undef elements. We believe this is a better legalization strategy. But it carries some issues due to the fragmented vector ISA. For example, i8 shifts and multiplies get widened and then later have to be promoted/split into vXi16 vectors. This has the potential to cause regressions so we wanted to get it in early in the 10.0 cycle so we have plenty of time to address them. Next steps will be to merge tests that explicitly test the command line option. And then we can remove the option and its associated code. llvm-svn: 368183
278 lines
8.4 KiB
LLVM
278 lines
8.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
|
|
|
|
define void @knownbits_zext_in_reg(i8*) nounwind {
|
|
; X32-LABEL: knownbits_zext_in_reg:
|
|
; X32: # %bb.0: # %BB
|
|
; X32-NEXT: pushl %ebx
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X32-NEXT: movzbl (%eax), %ecx
|
|
; X32-NEXT: imull $101, %ecx, %eax
|
|
; X32-NEXT: shrl $14, %eax
|
|
; X32-NEXT: imull $177, %ecx, %edx
|
|
; X32-NEXT: shrl $14, %edx
|
|
; X32-NEXT: movzbl %al, %ecx
|
|
; X32-NEXT: xorl %ebx, %ebx
|
|
; X32-NEXT: .p2align 4, 0x90
|
|
; X32-NEXT: .LBB0_1: # %CF
|
|
; X32-NEXT: # =>This Loop Header: Depth=1
|
|
; X32-NEXT: # Child Loop BB0_2 Depth 2
|
|
; X32-NEXT: movl %ecx, %eax
|
|
; X32-NEXT: divb %dl
|
|
; X32-NEXT: .p2align 4, 0x90
|
|
; X32-NEXT: .LBB0_2: # %CF237
|
|
; X32-NEXT: # Parent Loop BB0_1 Depth=1
|
|
; X32-NEXT: # => This Inner Loop Header: Depth=2
|
|
; X32-NEXT: testb %bl, %bl
|
|
; X32-NEXT: jne .LBB0_2
|
|
; X32-NEXT: jmp .LBB0_1
|
|
;
|
|
; X64-LABEL: knownbits_zext_in_reg:
|
|
; X64: # %bb.0: # %BB
|
|
; X64-NEXT: movzbl (%rdi), %eax
|
|
; X64-NEXT: imull $101, %eax, %ecx
|
|
; X64-NEXT: shrl $14, %ecx
|
|
; X64-NEXT: imull $177, %eax, %edx
|
|
; X64-NEXT: shrl $14, %edx
|
|
; X64-NEXT: movzbl %cl, %ecx
|
|
; X64-NEXT: xorl %esi, %esi
|
|
; X64-NEXT: .p2align 4, 0x90
|
|
; X64-NEXT: .LBB0_1: # %CF
|
|
; X64-NEXT: # =>This Loop Header: Depth=1
|
|
; X64-NEXT: # Child Loop BB0_2 Depth 2
|
|
; X64-NEXT: movl %ecx, %eax
|
|
; X64-NEXT: divb %dl
|
|
; X64-NEXT: .p2align 4, 0x90
|
|
; X64-NEXT: .LBB0_2: # %CF237
|
|
; X64-NEXT: # Parent Loop BB0_1 Depth=1
|
|
; X64-NEXT: # => This Inner Loop Header: Depth=2
|
|
; X64-NEXT: testb %sil, %sil
|
|
; X64-NEXT: jne .LBB0_2
|
|
; X64-NEXT: jmp .LBB0_1
|
|
BB:
|
|
%L5 = load i8, i8* %0
|
|
%Sl9 = select i1 true, i8 %L5, i8 undef
|
|
%B21 = udiv i8 %Sl9, -93
|
|
%B22 = udiv i8 %Sl9, 93
|
|
br label %CF
|
|
|
|
CF: ; preds = %CF246, %BB
|
|
%I40 = insertelement <4 x i8> zeroinitializer, i8 %B21, i32 1
|
|
%I41 = insertelement <4 x i8> zeroinitializer, i8 %B22, i32 1
|
|
%B41 = srem <4 x i8> %I40, %I41
|
|
br label %CF237
|
|
|
|
CF237: ; preds = %CF237, %CF
|
|
%Cmp73 = icmp ne i1 undef, undef
|
|
br i1 %Cmp73, label %CF237, label %CF246
|
|
|
|
CF246: ; preds = %CF237
|
|
%Cmp117 = icmp ult <4 x i8> %B41, undef
|
|
%E156 = extractelement <4 x i1> %Cmp117, i32 2
|
|
br label %CF
|
|
}
|
|
|
|
define i32 @knownbits_mask_add_lshr(i32 %a0, i32 %a1) nounwind {
|
|
; X32-LABEL: knownbits_mask_add_lshr:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: xorl %eax, %eax
|
|
; X32-NEXT: retl
|
|
;
|
|
; X64-LABEL: knownbits_mask_add_lshr:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: retq
|
|
%1 = and i32 %a0, 32767
|
|
%2 = and i32 %a1, 32766
|
|
%3 = add i32 %1, %2
|
|
%4 = lshr i32 %3, 17
|
|
ret i32 %4
|
|
}
|
|
|
|
define i128 @knownbits_mask_addc_shl(i64 %a0, i64 %a1, i64 %a2) nounwind {
|
|
; X32-LABEL: knownbits_mask_addc_shl:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: pushl %edi
|
|
; X32-NEXT: pushl %esi
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
; X32-NEXT: movl $-1024, %esi # imm = 0xFC00
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
|
|
; X32-NEXT: andl %esi, %edi
|
|
; X32-NEXT: andl {{[0-9]+}}(%esp), %esi
|
|
; X32-NEXT: addl %edi, %esi
|
|
; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
|
; X32-NEXT: adcl $0, %ecx
|
|
; X32-NEXT: shldl $22, %edx, %ecx
|
|
; X32-NEXT: shldl $22, %esi, %edx
|
|
; X32-NEXT: movl %edx, 8(%eax)
|
|
; X32-NEXT: movl %ecx, 12(%eax)
|
|
; X32-NEXT: movl $0, 4(%eax)
|
|
; X32-NEXT: movl $0, (%eax)
|
|
; X32-NEXT: popl %esi
|
|
; X32-NEXT: popl %edi
|
|
; X32-NEXT: retl $4
|
|
;
|
|
; X64-LABEL: knownbits_mask_addc_shl:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: andq $-1024, %rdi # imm = 0xFC00
|
|
; X64-NEXT: andq $-1024, %rsi # imm = 0xFC00
|
|
; X64-NEXT: addq %rdi, %rsi
|
|
; X64-NEXT: adcl $0, %edx
|
|
; X64-NEXT: shldq $54, %rsi, %rdx
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: retq
|
|
%1 = and i64 %a0, -1024
|
|
%2 = zext i64 %1 to i128
|
|
%3 = and i64 %a1, -1024
|
|
%4 = zext i64 %3 to i128
|
|
%5 = add i128 %2, %4
|
|
%6 = zext i64 %a2 to i128
|
|
%7 = shl i128 %6, 64
|
|
%8 = add i128 %5, %7
|
|
%9 = shl i128 %8, 54
|
|
ret i128 %9
|
|
}
|
|
|
|
define {i32, i1} @knownbits_uaddo_saddo(i64 %a0, i64 %a1) nounwind {
|
|
; X32-LABEL: knownbits_uaddo_saddo:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: pushl %ebx
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X32-NEXT: movl %ecx, %edx
|
|
; X32-NEXT: addl %eax, %edx
|
|
; X32-NEXT: setb %bl
|
|
; X32-NEXT: testl %eax, %eax
|
|
; X32-NEXT: setns %al
|
|
; X32-NEXT: testl %ecx, %ecx
|
|
; X32-NEXT: setns %cl
|
|
; X32-NEXT: cmpb %al, %cl
|
|
; X32-NEXT: sete %al
|
|
; X32-NEXT: testl %edx, %edx
|
|
; X32-NEXT: setns %dl
|
|
; X32-NEXT: cmpb %dl, %cl
|
|
; X32-NEXT: setne %dl
|
|
; X32-NEXT: andb %al, %dl
|
|
; X32-NEXT: orb %bl, %dl
|
|
; X32-NEXT: xorl %eax, %eax
|
|
; X32-NEXT: popl %ebx
|
|
; X32-NEXT: retl
|
|
;
|
|
; X64-LABEL: knownbits_uaddo_saddo:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: shlq $32, %rdi
|
|
; X64-NEXT: shlq $32, %rsi
|
|
; X64-NEXT: addq %rdi, %rsi
|
|
; X64-NEXT: setb %al
|
|
; X64-NEXT: seto %dl
|
|
; X64-NEXT: orb %al, %dl
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: retq
|
|
%1 = shl i64 %a0, 32
|
|
%2 = shl i64 %a1, 32
|
|
%u = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %1, i64 %2)
|
|
%uval = extractvalue {i64, i1} %u, 0
|
|
%uovf = extractvalue {i64, i1} %u, 1
|
|
%s = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %1, i64 %2)
|
|
%sval = extractvalue {i64, i1} %s, 0
|
|
%sovf = extractvalue {i64, i1} %s, 1
|
|
%sum = add i64 %uval, %sval
|
|
%3 = trunc i64 %sum to i32
|
|
%4 = or i1 %uovf, %sovf
|
|
%ret0 = insertvalue {i32, i1} undef, i32 %3, 0
|
|
%ret1 = insertvalue {i32, i1} %ret0, i1 %4, 1
|
|
ret {i32, i1} %ret1
|
|
}
|
|
|
|
define {i32, i1} @knownbits_usubo_ssubo(i64 %a0, i64 %a1) nounwind {
|
|
; X32-LABEL: knownbits_usubo_ssubo:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: pushl %ebx
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X32-NEXT: movl %ecx, %edx
|
|
; X32-NEXT: subl %eax, %edx
|
|
; X32-NEXT: setb %bl
|
|
; X32-NEXT: testl %eax, %eax
|
|
; X32-NEXT: setns %al
|
|
; X32-NEXT: testl %ecx, %ecx
|
|
; X32-NEXT: setns %cl
|
|
; X32-NEXT: cmpb %al, %cl
|
|
; X32-NEXT: setne %al
|
|
; X32-NEXT: testl %edx, %edx
|
|
; X32-NEXT: setns %dl
|
|
; X32-NEXT: cmpb %dl, %cl
|
|
; X32-NEXT: setne %dl
|
|
; X32-NEXT: andb %al, %dl
|
|
; X32-NEXT: orb %bl, %dl
|
|
; X32-NEXT: xorl %eax, %eax
|
|
; X32-NEXT: popl %ebx
|
|
; X32-NEXT: retl
|
|
;
|
|
; X64-LABEL: knownbits_usubo_ssubo:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: shlq $32, %rdi
|
|
; X64-NEXT: shlq $32, %rsi
|
|
; X64-NEXT: cmpq %rsi, %rdi
|
|
; X64-NEXT: setb %al
|
|
; X64-NEXT: seto %dl
|
|
; X64-NEXT: orb %al, %dl
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: retq
|
|
%1 = shl i64 %a0, 32
|
|
%2 = shl i64 %a1, 32
|
|
%u = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %1, i64 %2)
|
|
%uval = extractvalue {i64, i1} %u, 0
|
|
%uovf = extractvalue {i64, i1} %u, 1
|
|
%s = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %1, i64 %2)
|
|
%sval = extractvalue {i64, i1} %s, 0
|
|
%sovf = extractvalue {i64, i1} %s, 1
|
|
%sum = add i64 %uval, %sval
|
|
%3 = trunc i64 %sum to i32
|
|
%4 = or i1 %uovf, %sovf
|
|
%ret0 = insertvalue {i32, i1} undef, i32 %3, 0
|
|
%ret1 = insertvalue {i32, i1} %ret0, i1 %4, 1
|
|
ret {i32, i1} %ret1
|
|
}
|
|
|
|
declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
|
|
declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
|
|
declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
|
|
declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
|
|
|
|
define i32 @knownbits_fshl(i32 %a0) nounwind {
|
|
; X32-LABEL: knownbits_fshl:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: movl $3, %eax
|
|
; X32-NEXT: retl
|
|
;
|
|
; X64-LABEL: knownbits_fshl:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl $3, %eax
|
|
; X64-NEXT: retq
|
|
%1 = tail call i32 @llvm.fshl.i32(i32 %a0, i32 -1, i32 5)
|
|
%2 = and i32 %1, 3
|
|
ret i32 %2
|
|
}
|
|
|
|
define i32 @knownbits_fshr(i32 %a0) nounwind {
|
|
; X32-LABEL: knownbits_fshr:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: movl $3, %eax
|
|
; X32-NEXT: retl
|
|
;
|
|
; X64-LABEL: knownbits_fshr:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl $3, %eax
|
|
; X64-NEXT: retq
|
|
%1 = tail call i32 @llvm.fshr.i32(i32 %a0, i32 -1, i32 5)
|
|
%2 = and i32 %1, 3
|
|
ret i32 %2
|
|
}
|
|
|
|
declare i32 @llvm.fshl.i32(i32, i32, i32) nounwind readnone
|
|
declare i32 @llvm.fshr.i32(i32, i32, i32) nounwind readnone
|