Sanjay Patel 892c731681 [Support] improve known bits analysis for leading zeros of multiply
Instead of summing leading zeros on the input operands, multiply the
max possible values of those inputs and count the leading zeros of
the result. This can give us an extra zero bit (typically in cases
where one of the operands is a known constant).

This allows folding away the remaining 'add' ops in the motivating
bug (modeled in the PhaseOrdering IR test):
https://github.com/llvm/llvm-project/issues/48399

Fixes #48399

Differential Revision: https://reviews.llvm.org/D115969
2021-12-20 09:10:50 -05:00

135 lines
4.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=i386-unknown | FileCheck %s --check-prefix=X86
define i128 @foo(i128 %t, i128 %u) {
; X64-LABEL: foo:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %r8
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: imulq %rdi, %rcx
; X64-NEXT: mulq %rdx
; X64-NEXT: addq %rcx, %rdx
; X64-NEXT: imulq %rsi, %r8
; X64-NEXT: addq %r8, %rdx
; X64-NEXT: retq
;
; X86-LABEL: foo:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: pushl %ebx
; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: pushl %edi
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: subl $8, %esp
; X86-NEXT: .cfi_def_cfa_offset 28
; X86-NEXT: .cfi_offset %esi, -20
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: .cfi_offset %ebp, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: imull %ecx, %ebp
; X86-NEXT: movl %esi, %eax
; X86-NEXT: mull %ecx
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: imull %esi, %edi
; X86-NEXT: addl %edx, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl %ebp, %edi
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: imull %ebp, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: mull %esi
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: imull %esi, %ecx
; X86-NEXT: addl %edx, %ecx
; X86-NEXT: addl %ebx, %eax
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: adcl %edi, %ecx
; X86-NEXT: movl %esi, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ebp, %eax
; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %eax, %esi
; X86-NEXT: addl %edi, %esi
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: addl %esi, %ebp
; X86-NEXT: adcl %ebx, %edi
; X86-NEXT: setb %bl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %edi, %eax
; X86-NEXT: movzbl %bl, %esi
; X86-NEXT: adcl %esi, %edx
; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; X86-NEXT: adcl %ecx, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ebp, 4(%ecx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %esi, (%ecx)
; X86-NEXT: movl %eax, 8(%ecx)
; X86-NEXT: movl %edx, 12(%ecx)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: addl $8, %esp
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: popl %edi
; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: popl %ebx
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: popl %ebp
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl $4
%k = mul i128 %t, %u
ret i128 %k
}
@aaa = external dso_local global i128
@bbb = external dso_local global i128
define void @PR13897() nounwind {
; X64-LABEL: PR13897:
; X64: # %bb.0: # %"0x0"
; X64-NEXT: movl bbb(%rip), %eax
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: shlq $32, %rcx
; X64-NEXT: orq %rax, %rcx
; X64-NEXT: movq %rcx, aaa+8(%rip)
; X64-NEXT: movq %rcx, aaa(%rip)
; X64-NEXT: retq
;
; X86-LABEL: PR13897:
; X86: # %bb.0: # %"0x0"
; X86-NEXT: movl bbb, %eax
; X86-NEXT: movl %eax, aaa+12
; X86-NEXT: movl %eax, aaa+8
; X86-NEXT: movl %eax, aaa+4
; X86-NEXT: movl %eax, aaa
; X86-NEXT: retl
"0x0":
%0 = load i128, i128* @bbb
%1 = and i128 %0, 4294967295
%2 = shl i128 %0, 96
%3 = mul i128 %1, 18446744078004518913
%4 = add i128 %3, %2
store i128 %4, i128* @aaa
ret void
}