llvm-project/llvm/test/CodeGen/X86/smul-with-overflow.ll
Craig Topper e30a4fc3e2
[TargetLowering] Improve one signature of forceExpandWideMUL. (#123991)
We have two forceExpandWideMUL functions. One takes the low and high
half of 2 inputs and calculates the low and high half of their product.
This does not calculate the full 2x width product.

The other signature takes 2 inputs and calculates the low and high half
of their full 2x width product. Previously it did this by sign/zero
extending the inputs to create the high bits and then calling the other
function.

We can instead copy the algorithm from the other function and use the
Signed flag to determine whether we should do SRA or SRL. This avoids
the need to multiply the high part of the inputs and add them to the
high half of the result. This improves the generated code for signed
multiplication.

This should improve the performance of #123262. I don't know yet how
close we will get to gcc.
2025-01-23 12:49:35 -08:00

846 lines
32 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefixes=CHECK,X64
@ok = internal constant [4 x i8] c"%d\0A\00"
@no = internal constant [4 x i8] c"no\0A\00"
define i1 @test1(i32 %v1, i32 %v2) nounwind {
; X86-LABEL: test1:
; X86: # %bb.0: # %entry
; X86-NEXT: subl $12, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
; X86-NEXT: jno .LBB0_1
; X86-NEXT: # %bb.2: # %overflow
; X86-NEXT: movl $no, (%esp)
; X86-NEXT: calll printf@PLT
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
; X86-NEXT: .LBB0_1: # %normal
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl $ok, (%esp)
; X86-NEXT: calll printf@PLT
; X86-NEXT: movb $1, %al
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
;
; X64-LABEL: test1:
; X64: # %bb.0: # %entry
; X64-NEXT: pushq %rax
; X64-NEXT: movl %edi, %eax
; X64-NEXT: imull %esi, %eax
; X64-NEXT: jno .LBB0_1
; X64-NEXT: # %bb.2: # %overflow
; X64-NEXT: movl $no, %edi
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: callq printf@PLT
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
; X64-NEXT: .LBB0_1: # %normal
; X64-NEXT: movl $ok, %edi
; X64-NEXT: movl %eax, %esi
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: callq printf@PLT
; X64-NEXT: movb $1, %al
; X64-NEXT: popq %rcx
; X64-NEXT: retq
entry:
%t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
%sum = extractvalue {i32, i1} %t, 0
%obit = extractvalue {i32, i1} %t, 1
br i1 %obit, label %overflow, label %normal
normal:
%t1 = tail call i32 (ptr, ...) @printf( ptr @ok, i32 %sum ) nounwind
ret i1 true
overflow:
%t2 = tail call i32 (ptr, ...) @printf( ptr @no ) nounwind
ret i1 false
}
define i1 @test2(i32 %v1, i32 %v2) nounwind {
; X86-LABEL: test2:
; X86: # %bb.0: # %entry
; X86-NEXT: subl $12, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
; X86-NEXT: jno .LBB1_2
; X86-NEXT: # %bb.1: # %overflow
; X86-NEXT: movl $no, (%esp)
; X86-NEXT: calll printf@PLT
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
; X86-NEXT: .LBB1_2: # %normal
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl $ok, (%esp)
; X86-NEXT: calll printf@PLT
; X86-NEXT: movb $1, %al
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
;
; X64-LABEL: test2:
; X64: # %bb.0: # %entry
; X64-NEXT: pushq %rax
; X64-NEXT: movl %edi, %eax
; X64-NEXT: imull %esi, %eax
; X64-NEXT: jno .LBB1_2
; X64-NEXT: # %bb.1: # %overflow
; X64-NEXT: movl $no, %edi
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: callq printf@PLT
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
; X64-NEXT: .LBB1_2: # %normal
; X64-NEXT: movl $ok, %edi
; X64-NEXT: movl %eax, %esi
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: callq printf@PLT
; X64-NEXT: movb $1, %al
; X64-NEXT: popq %rcx
; X64-NEXT: retq
entry:
%t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
%sum = extractvalue {i32, i1} %t, 0
%obit = extractvalue {i32, i1} %t, 1
br i1 %obit, label %overflow, label %normal
overflow:
%t2 = tail call i32 (ptr, ...) @printf( ptr @no ) nounwind
ret i1 false
normal:
%t1 = tail call i32 (ptr, ...) @printf( ptr @ok, i32 %sum ) nounwind
ret i1 true
}
declare i32 @printf(ptr, ...) nounwind
declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
define i32 @test3(i32 %a, i32 %b) nounwind readnone {
; X86-LABEL: test3:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: test3:
; X64: # %bb.0: # %entry
; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal (%rdi,%rsi), %eax
; X64-NEXT: addl %eax, %eax
; X64-NEXT: retq
entry:
%tmp0 = add i32 %b, %a
%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 2)
%tmp2 = extractvalue { i32, i1 } %tmp1, 0
ret i32 %tmp2
}
define i32 @test4(i32 %a, i32 %b) nounwind readnone {
; X86-LABEL: test4:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull $4, %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: test4:
; X64: # %bb.0: # %entry
; X64-NEXT: addl %esi, %edi
; X64-NEXT: imull $4, %edi, %eax
; X64-NEXT: retq
entry:
%tmp0 = add i32 %b, %a
%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 4)
%tmp2 = extractvalue { i32, i1 } %tmp1, 0
ret i32 %tmp2
}
declare { i63, i1 } @llvm.smul.with.overflow.i63(i63, i63) nounwind readnone
; Was returning false, should return true (not constant folded yet though).
; PR13991
define i1 @test5() nounwind {
; CHECK-LABEL: test5:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: ret{{[l|q]}}
entry:
%res = call { i63, i1 } @llvm.smul.with.overflow.i63(i63 4, i63 4611686018427387903)
%sum = extractvalue { i63, i1 } %res, 0
%overflow = extractvalue { i63, i1 } %res, 1
ret i1 %overflow
}
declare { i129, i1 } @llvm.smul.with.overflow.i129(i129, i129)
define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
; X86-LABEL: smul_ovf:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $108, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andl $1, %eax
; X86-NEXT: negl %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andl $1, %eax
; X86-NEXT: negl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ecx
; X86-NEXT: movl %eax, %esi
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl %edx, %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: movl %edx, %esi
; X86-NEXT: adcl $0, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edi, %ebp
; X86-NEXT: addl %eax, %ebp
; X86-NEXT: movl %eax, %edi
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %edx, %esi
; X86-NEXT: setb %al
; X86-NEXT: addl %edi, %esi
; X86-NEXT: movzbl %al, %edi
; X86-NEXT: adcl %edx, %edi
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: addl %esi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ebp, %eax
; X86-NEXT: adcl %edi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, %esi
; X86-NEXT: adcl $0, %edi
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl %edx, %ebp
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %eax, %ebp
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %edx, %ebx
; X86-NEXT: setb %al
; X86-NEXT: addl %ecx, %ebx
; X86-NEXT: movzbl %al, %ecx
; X86-NEXT: adcl %edx, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl %ebx, %edx
; X86-NEXT: adcl $0, %edx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: adcl $0, %eax
; X86-NEXT: addl %esi, %edx
; X86-NEXT: adcl %edi, %eax
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: adcl %ebp, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: adcl %ebx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl (%esp), %esi # 4-byte Reload
; X86-NEXT: movl %esi, %edi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X86-NEXT: addl %ebp, %edi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: addl %ecx, %edi
; X86-NEXT: adcl %ebp, %ebx
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: addl %ebx, %eax
; X86-NEXT: addl %ecx, %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ecx, %ebx
; X86-NEXT: adcl %edi, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: addl %esi, %ebp
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X86-NEXT: adcl %ecx, %eax
; X86-NEXT: addl %ebx, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: mull %ecx
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, %esi
; X86-NEXT: addl %edi, %esi
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: addl %esi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %ecx, %ebp
; X86-NEXT: setb %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %edi
; X86-NEXT: addl %ebp, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: adcl %eax, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: mull %ecx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, %esi
; X86-NEXT: addl %edi, %esi
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: mull %ebp
; X86-NEXT: movl %edx, %edi
; X86-NEXT: addl %esi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %ecx, %edi
; X86-NEXT: setb %bl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ebp
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: addl %edi, %ecx
; X86-NEXT: movzbl %bl, %eax
; X86-NEXT: adcl %eax, %ebp
; X86-NEXT: addl (%esp), %ecx # 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: addl %esi, %ebx
; X86-NEXT: adcl $0, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: mull %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: addl %ebx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %edi, %esi
; X86-NEXT: setb (%esp) # 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: addl %esi, %ebx
; X86-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
; X86-NEXT: adcl %eax, %edi
; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: adcl $0, %edi
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, %esi
; X86-NEXT: addl %ebp, %esi
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: addl %esi, %eax
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: adcl %ecx, %ebp
; X86-NEXT: setb %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, %esi
; X86-NEXT: addl %ebp, %esi
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: adcl %eax, %ebp
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: addl %ebx, %ecx
; X86-NEXT: movl (%esp), %edx # 4-byte Reload
; X86-NEXT: adcl %edi, %edx
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: adcl %eax, %esi
; X86-NEXT: adcl $0, %ebp
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl %edx, (%esp) # 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: adcl $0, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sarl $31, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, %edi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ecx
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl %esi, %eax
; X86-NEXT: movl %edx, %ebx
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: addl %edi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %esi, %ebx
; X86-NEXT: setb %al
; X86-NEXT: addl %ebp, %ebx
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: adcl %edx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ecx
; X86-NEXT: movl %eax, %edi
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: addl %eax, %edi
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: addl %esi, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %eax, %ecx
; X86-NEXT: setb %al
; X86-NEXT: addl %ebp, %ecx
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: adcl %edx, %eax
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: addl %ecx, %edi
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: adcl %eax, %ecx
; X86-NEXT: movl %ebx, %esi
; X86-NEXT: adcl $0, %esi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X86-NEXT: adcl $0, %ebp
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, %edx
; X86-NEXT: adcl $0, %eax
; X86-NEXT: addl %esi, %edx
; X86-NEXT: adcl %ebp, %eax
; X86-NEXT: setb %cl
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: addl %edi, %edx
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movzbl %cl, %ecx
; X86-NEXT: adcl %ebx, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %esi, %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: addl %ecx, %ebx
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl %ecx, %ebx
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: movl %edi, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: addl %ecx, %edi
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: addl %esi, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %ebp, %edi
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: adcl %edi, %ecx
; X86-NEXT: addl %esi, %edx
; X86-NEXT: adcl %eax, %ebp
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: adcl $0, %edx
; X86-NEXT: adcl $0, %ebp
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: sarl $31, %esi
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: adcl %esi, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: adcl %esi, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: adcl %esi, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %eax, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, %esi
; X86-NEXT: addl %edx, %esi
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: addl %eax, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %edx, %ecx
; X86-NEXT: setb %bl
; X86-NEXT: addl %eax, %ecx
; X86-NEXT: movzbl %bl, %ebx
; X86-NEXT: adcl %edx, %ebx
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: movl %esi, %edi
; X86-NEXT: adcl %ebx, %edi
; X86-NEXT: movl %ecx, %ebp
; X86-NEXT: adcl $0, %ebp
; X86-NEXT: movl %ebx, %edx
; X86-NEXT: adcl $0, %edx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %esi, %edi
; X86-NEXT: movl %edi, (%esp) # 4-byte Spill
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: adcl $0, %esi
; X86-NEXT: movl %ebx, %edi
; X86-NEXT: adcl $0, %edi
; X86-NEXT: addl %ebp, %esi
; X86-NEXT: adcl %edx, %edi
; X86-NEXT: setb %al
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X86-NEXT: adcl %ebp, %edi
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: adcl %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: adcl $0, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: addl %eax, %eax
; X86-NEXT: adcl %edx, %edx
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: adcl %ebx, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: addl %ebx, %ebx
; X86-NEXT: movl %ebp, %ecx
; X86-NEXT: adcl %ebp, %ecx
; X86-NEXT: adcl %eax, %eax
; X86-NEXT: adcl %edx, %edx
; X86-NEXT: addl %esi, %ebx
; X86-NEXT: adcl %edi, %ecx
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl (%esp), %edi # 4-byte Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: movl %edi, (%esp) # 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: movl %edi, %ebp
; X86-NEXT: sarl $31, %ebp
; X86-NEXT: xorl %ebp, %ebx
; X86-NEXT: xorl %ebp, %esi
; X86-NEXT: orl %ebx, %esi
; X86-NEXT: xorl %ebp, %eax
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: xorl %ebp, %ebx
; X86-NEXT: orl %eax, %ebx
; X86-NEXT: orl %esi, %ebx
; X86-NEXT: xorl %ebp, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: xorl %ebp, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: xorl %ebp, %edx
; X86-NEXT: xorl (%esp), %ebp # 4-byte Folded Reload
; X86-NEXT: orl %edx, %ebp
; X86-NEXT: orl %eax, %ebp
; X86-NEXT: orl %ebx, %ebp
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: andl $1, %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: negl %eax
; X86-NEXT: xorl %eax, %edi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: xorl %eax, %esi
; X86-NEXT: orl %edi, %esi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: xorl %eax, %edi
; X86-NEXT: xorl %edx, %eax
; X86-NEXT: orl %edi, %eax
; X86-NEXT: orl %esi, %eax
; X86-NEXT: orl %ebp, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, 4(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, 12(%eax)
; X86-NEXT: movb %cl, 16(%eax)
; X86-NEXT: setne 32(%eax)
; X86-NEXT: addl $108, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: smul_ovf:
; X64: # %bb.0:
; X64-NEXT: pushq %rbp
; X64-NEXT: pushq %r15
; X64-NEXT: pushq %r14
; X64-NEXT: pushq %r13
; X64-NEXT: pushq %r12
; X64-NEXT: pushq %rbx
; X64-NEXT: movq %rcx, %r14
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: movq %rsi, %rbx
; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13
; X64-NEXT: andl $1, %r13d
; X64-NEXT: negq %r13
; X64-NEXT: andl $1, %r14d
; X64-NEXT: negq %r14
; X64-NEXT: movq %r14, %rax
; X64-NEXT: mulq %r8
; X64-NEXT: movq %rdx, %r11
; X64-NEXT: movq %rax, %rdi
; X64-NEXT: movq %rax, %r12
; X64-NEXT: addq %rdx, %r12
; X64-NEXT: adcq $0, %r11
; X64-NEXT: movq %r14, %rax
; X64-NEXT: mulq %r9
; X64-NEXT: addq %rax, %r12
; X64-NEXT: adcq %rdx, %r11
; X64-NEXT: setb %cl
; X64-NEXT: movzbl %cl, %ecx
; X64-NEXT: addq %rax, %r11
; X64-NEXT: adcq %rdx, %rcx
; X64-NEXT: addq %rdi, %r11
; X64-NEXT: adcq %r12, %rcx
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: mulq %r8
; X64-NEXT: movq %rdx, %r10
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %r15, %rax
; X64-NEXT: mulq %r8
; X64-NEXT: movq %rdx, %r8
; X64-NEXT: movq %rax, %rbp
; X64-NEXT: addq %r10, %rbp
; X64-NEXT: adcq $0, %r8
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: mulq %r9
; X64-NEXT: movq %rdx, %rsi
; X64-NEXT: addq %rbp, %rax
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq %r8, %rsi
; X64-NEXT: setb %al
; X64-NEXT: movzbl %al, %ebp
; X64-NEXT: movq %r15, %rax
; X64-NEXT: mulq %r9
; X64-NEXT: movq %rdx, %r8
; X64-NEXT: movq %rax, %r10
; X64-NEXT: addq %rsi, %r10
; X64-NEXT: adcq %rbp, %r8
; X64-NEXT: addq %rdi, %r10
; X64-NEXT: adcq %r12, %r8
; X64-NEXT: adcq $0, %r11
; X64-NEXT: adcq $0, %rcx
; X64-NEXT: movq %r13, %rax
; X64-NEXT: mulq %rbx
; X64-NEXT: movq %rdx, %rsi
; X64-NEXT: movq %rax, %r9
; X64-NEXT: movq %r15, %rax
; X64-NEXT: mulq %r13
; X64-NEXT: movq %rax, %rbx
; X64-NEXT: addq %rsi, %rbx
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: adcq $0, %r15
; X64-NEXT: addq %r9, %rbx
; X64-NEXT: adcq %rsi, %r15
; X64-NEXT: setb %sil
; X64-NEXT: movzbl %sil, %esi
; X64-NEXT: addq %rax, %r15
; X64-NEXT: adcq %rdx, %rsi
; X64-NEXT: addq %r9, %r15
; X64-NEXT: adcq %rbx, %rsi
; X64-NEXT: addq %r9, %r10
; X64-NEXT: adcq %r8, %rbx
; X64-NEXT: adcq $0, %r15
; X64-NEXT: adcq $0, %rsi
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: sarq $63, %rax
; X64-NEXT: movq %rcx, %rdi
; X64-NEXT: sarq $63, %rdi
; X64-NEXT: addq %r11, %r15
; X64-NEXT: adcq %rcx, %rsi
; X64-NEXT: movq %rdi, %r9
; X64-NEXT: adcq %rax, %r9
; X64-NEXT: adcq %rax, %rdi
; X64-NEXT: movq %r14, %rax
; X64-NEXT: mulq %r13
; X64-NEXT: movq %rax, %r8
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: addq %rdx, %rcx
; X64-NEXT: movq %rdx, %r11
; X64-NEXT: adcq $0, %r11
; X64-NEXT: addq %rax, %rcx
; X64-NEXT: adcq %rdx, %r11
; X64-NEXT: setb %al
; X64-NEXT: addq %r8, %r11
; X64-NEXT: movzbl %al, %r12d
; X64-NEXT: adcq %rdx, %r12
; X64-NEXT: movq %r13, %rax
; X64-NEXT: imulq %r14
; X64-NEXT: addq %rax, %rax
; X64-NEXT: adcq %rdx, %rdx
; X64-NEXT: addq %r11, %rax
; X64-NEXT: adcq %r12, %rdx
; X64-NEXT: addq %r8, %r15
; X64-NEXT: adcq %rsi, %rcx
; X64-NEXT: adcq %r9, %rax
; X64-NEXT: adcq %rdi, %rdx
; X64-NEXT: movq %rbx, %rsi
; X64-NEXT: sarq $63, %rsi
; X64-NEXT: xorq %rsi, %rax
; X64-NEXT: xorq %rsi, %r15
; X64-NEXT: orq %rax, %r15
; X64-NEXT: xorq %rsi, %rdx
; X64-NEXT: xorq %rcx, %rsi
; X64-NEXT: orq %rdx, %rsi
; X64-NEXT: orq %r15, %rsi
; X64-NEXT: movl %r10d, %edx
; X64-NEXT: andl $1, %edx
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: negq %rcx
; X64-NEXT: xorq %rcx, %rbx
; X64-NEXT: xorq %r10, %rcx
; X64-NEXT: orq %rbx, %rcx
; X64-NEXT: orq %rsi, %rcx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; X64-NEXT: movq %rcx, 8(%rax)
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; X64-NEXT: movq %rcx, (%rax)
; X64-NEXT: movb %dl, 16(%rax)
; X64-NEXT: setne 32(%rax)
; X64-NEXT: popq %rbx
; X64-NEXT: popq %r12
; X64-NEXT: popq %r13
; X64-NEXT: popq %r14
; X64-NEXT: popq %r15
; X64-NEXT: popq %rbp
; X64-NEXT: retq
%r = tail call { i129, i1 } @llvm.smul.with.overflow.i129(i129 %x, i129 %y)
ret { i129, i1 } %r
}