llvm-project/llvm/test/CodeGen/X86/addsub-constant-folding.ll
Guozhi Wei 6599961c17 [TwoAddressInstructionPass] Improve the SrcRegMap and DstRegMap computation
This patch contains following enhancements to SrcRegMap and DstRegMap:

  1 In findOnlyInterestingUse not only check if the Reg is two address usage,
    but also check after commutation can it be two address usage.

  2 If a physical register is clobbered, remove SrcRegMap entries that are
    mapped to it.

  3 In processTiedPairs, when create a new COPY instruction, add a SrcRegMap
    entry only when the COPY instruction is coalescable. (The COPY src is
    killed)

With these enhancements isProfitableToCommute can do better commute decision,
and finally more register copies are removed.

Differential Revision: https://reviews.llvm.org/D108731
2021-10-11 15:28:31 -07:00

1144 lines
35 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
declare void @use(i32 %arg)
declare void @vec_use(<4 x i32> %arg)
; (x+c1)+c2
define i32 @add_const_add_const(i32 %arg) {
; X86-LABEL: add_const_add_const:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl $10, %eax
; X86-NEXT: retl
;
; X64-LABEL: add_const_add_const:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal 10(%rdi), %eax
; X64-NEXT: retq
%t0 = add i32 %arg, 8
%t1 = add i32 %t0, 2
ret i32 %t1
}
define i32 @add_const_add_const_extrause(i32 %arg) {
; X86-LABEL: add_const_add_const_extrause:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: leal 8(%esi), %eax
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: calll use@PLT
; X86-NEXT: addl $4, %esp
; X86-NEXT: .cfi_adjust_cfa_offset -4
; X86-NEXT: addl $10, %esi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: add_const_add_const_extrause:
; X64: # %bb.0:
; X64-NEXT: pushq %rbx
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: .cfi_offset %rbx, -16
; X64-NEXT: movl %edi, %ebx
; X64-NEXT: leal 8(%rbx), %edi
; X64-NEXT: callq use@PLT
; X64-NEXT: addl $10, %ebx
; X64-NEXT: movl %ebx, %eax
; X64-NEXT: popq %rbx
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%t0 = add i32 %arg, 8
call void @use(i32 %t0)
%t1 = add i32 %t0, 2
ret i32 %t1
}
define <4 x i32> @vec_add_const_add_const(<4 x i32> %arg) {
; X86-LABEL: vec_add_const_add_const:
; X86: # %bb.0:
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vec_add_const_add_const:
; X64: # %bb.0:
; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: retq
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
ret <4 x i32> %t1
}
define <4 x i32> @vec_add_const_add_const_extrause(<4 x i32> %arg) {
; X86-LABEL: vec_add_const_add_const_extrause:
; X86: # %bb.0:
; X86-NEXT: subl $16, %esp
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
; X86-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
; X86-NEXT: paddd %xmm1, %xmm0
; X86-NEXT: calll vec_use@PLT
; X86-NEXT: movdqu (%esp), %xmm0 # 16-byte Reload
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: addl $16, %esp
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: vec_add_const_add_const_extrause:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
; X64-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
; X64-NEXT: paddd %xmm1, %xmm0
; X64-NEXT: callq vec_use@PLT
; X64-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: addq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
call void @vec_use(<4 x i32> %t0)
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
ret <4 x i32> %t1
}
define <4 x i32> @vec_add_const_add_const_nonsplat(<4 x i32> %arg) {
; X86-LABEL: vec_add_const_add_const_nonsplat:
; X86: # %bb.0:
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vec_add_const_add_const_nonsplat:
; X64: # %bb.0:
; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: retq
%t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
%t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
ret <4 x i32> %t1
}
; (x+c1)-c2
define i32 @add_const_sub_const(i32 %arg) {
; X86-LABEL: add_const_sub_const:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl $6, %eax
; X86-NEXT: retl
;
; X64-LABEL: add_const_sub_const:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal 6(%rdi), %eax
; X64-NEXT: retq
%t0 = add i32 %arg, 8
%t1 = sub i32 %t0, 2
ret i32 %t1
}
define i32 @add_const_sub_const_extrause(i32 %arg) {
; X86-LABEL: add_const_sub_const_extrause:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: leal 8(%esi), %eax
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: calll use@PLT
; X86-NEXT: addl $4, %esp
; X86-NEXT: .cfi_adjust_cfa_offset -4
; X86-NEXT: addl $6, %esi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: add_const_sub_const_extrause:
; X64: # %bb.0:
; X64-NEXT: pushq %rbx
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: .cfi_offset %rbx, -16
; X64-NEXT: movl %edi, %ebx
; X64-NEXT: leal 8(%rbx), %edi
; X64-NEXT: callq use@PLT
; X64-NEXT: addl $6, %ebx
; X64-NEXT: movl %ebx, %eax
; X64-NEXT: popq %rbx
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%t0 = add i32 %arg, 8
call void @use(i32 %t0)
%t1 = sub i32 %t0, 2
ret i32 %t1
}
define <4 x i32> @vec_add_const_sub_const(<4 x i32> %arg) {
; X86-LABEL: vec_add_const_sub_const:
; X86: # %bb.0:
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vec_add_const_sub_const:
; X64: # %bb.0:
; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: retq
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
ret <4 x i32> %t1
}
define <4 x i32> @vec_add_const_sub_const_extrause(<4 x i32> %arg) {
; X86-LABEL: vec_add_const_sub_const_extrause:
; X86: # %bb.0:
; X86-NEXT: subl $16, %esp
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
; X86-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
; X86-NEXT: paddd %xmm1, %xmm0
; X86-NEXT: calll vec_use@PLT
; X86-NEXT: movdqu (%esp), %xmm0 # 16-byte Reload
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: addl $16, %esp
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: vec_add_const_sub_const_extrause:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
; X64-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
; X64-NEXT: paddd %xmm1, %xmm0
; X64-NEXT: callq vec_use@PLT
; X64-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: addq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
call void @vec_use(<4 x i32> %t0)
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
ret <4 x i32> %t1
}
define <4 x i32> @vec_add_const_sub_const_nonsplat(<4 x i32> %arg) {
; X86-LABEL: vec_add_const_sub_const_nonsplat:
; X86: # %bb.0:
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vec_add_const_sub_const_nonsplat:
; X64: # %bb.0:
; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: retq
%t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
%t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
ret <4 x i32> %t1
}
; c2-(x+c1)
define i32 @add_const_const_sub(i32 %arg) {
; X86-LABEL: add_const_const_sub:
; X86: # %bb.0:
; X86-NEXT: movl $-6, %eax
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
; X64-LABEL: add_const_const_sub:
; X64: # %bb.0:
; X64-NEXT: movl $-6, %eax
; X64-NEXT: subl %edi, %eax
; X64-NEXT: retq
%t0 = add i32 %arg, 8
%t1 = sub i32 2, %t0
ret i32 %t1
}
define i32 @add_const_const_sub_extrause(i32 %arg) {
; X86-LABEL: add_const_const_sub_extrause:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: leal 8(%esi), %eax
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: calll use@PLT
; X86-NEXT: addl $4, %esp
; X86-NEXT: .cfi_adjust_cfa_offset -4
; X86-NEXT: movl $-6, %eax
; X86-NEXT: subl %esi, %eax
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: add_const_const_sub_extrause:
; X64: # %bb.0:
; X64-NEXT: pushq %rbx
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: .cfi_offset %rbx, -16
; X64-NEXT: movl %edi, %ebx
; X64-NEXT: leal 8(%rbx), %edi
; X64-NEXT: callq use@PLT
; X64-NEXT: movl $-6, %eax
; X64-NEXT: subl %ebx, %eax
; X64-NEXT: popq %rbx
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%t0 = add i32 %arg, 8
call void @use(i32 %t0)
%t1 = sub i32 2, %t0
ret i32 %t1
}
define <4 x i32> @vec_add_const_const_sub(<4 x i32> %arg) {
; X86-LABEL: vec_add_const_const_sub:
; X86: # %bb.0:
; X86-NEXT: movdqa {{.*#+}} xmm1 = [4294967290,4294967290,4294967290,4294967290]
; X86-NEXT: psubd %xmm0, %xmm1
; X86-NEXT: movdqa %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vec_add_const_const_sub:
; X64: # %bb.0:
; X64-NEXT: movdqa {{.*#+}} xmm1 = [4294967290,4294967290,4294967290,4294967290]
; X64-NEXT: psubd %xmm0, %xmm1
; X64-NEXT: movdqa %xmm1, %xmm0
; X64-NEXT: retq
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
ret <4 x i32> %t1
}
define <4 x i32> @vec_add_const_const_sub_extrause(<4 x i32> %arg) {
; X86-LABEL: vec_add_const_const_sub_extrause:
; X86: # %bb.0:
; X86-NEXT: subl $16, %esp
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
; X86-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
; X86-NEXT: paddd %xmm1, %xmm0
; X86-NEXT: calll vec_use@PLT
; X86-NEXT: movdqa {{.*#+}} xmm0 = [4294967290,4294967290,4294967290,4294967290]
; X86-NEXT: movdqu (%esp), %xmm1 # 16-byte Reload
; X86-NEXT: psubd %xmm1, %xmm0
; X86-NEXT: addl $16, %esp
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: vec_add_const_const_sub_extrause:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
; X64-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
; X64-NEXT: paddd %xmm1, %xmm0
; X64-NEXT: callq vec_use@PLT
; X64-NEXT: movdqa {{.*#+}} xmm0 = [4294967290,4294967290,4294967290,4294967290]
; X64-NEXT: psubd (%rsp), %xmm0 # 16-byte Folded Reload
; X64-NEXT: addq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
call void @vec_use(<4 x i32> %t0)
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
ret <4 x i32> %t1
}
define <4 x i32> @vec_add_const_const_sub_nonsplat(<4 x i32> %arg) {
; X86-LABEL: vec_add_const_const_sub_nonsplat:
; X86: # %bb.0:
; X86-NEXT: movdqa {{.*#+}} xmm1 = <4294967277,u,u,4294967290>
; X86-NEXT: psubd %xmm0, %xmm1
; X86-NEXT: movdqa %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vec_add_const_const_sub_nonsplat:
; X64: # %bb.0:
; X64-NEXT: movdqa {{.*#+}} xmm1 = <4294967277,u,u,4294967290>
; X64-NEXT: psubd %xmm0, %xmm1
; X64-NEXT: movdqa %xmm1, %xmm0
; X64-NEXT: retq
%t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
%t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
ret <4 x i32> %t1
}
; (x-c1)+c2
define i32 @sub_const_add_const(i32 %arg) {
; X86-LABEL: sub_const_add_const:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl $-6, %eax
; X86-NEXT: retl
;
; X64-LABEL: sub_const_add_const:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal -6(%rdi), %eax
; X64-NEXT: retq
%t0 = sub i32 %arg, 8
%t1 = add i32 %t0, 2
ret i32 %t1
}
define i32 @sub_const_add_const_extrause(i32 %arg) {
; X86-LABEL: sub_const_add_const_extrause:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: leal -8(%esi), %eax
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: calll use@PLT
; X86-NEXT: addl $4, %esp
; X86-NEXT: .cfi_adjust_cfa_offset -4
; X86-NEXT: addl $-6, %esi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: sub_const_add_const_extrause:
; X64: # %bb.0:
; X64-NEXT: pushq %rbx
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: .cfi_offset %rbx, -16
; X64-NEXT: movl %edi, %ebx
; X64-NEXT: leal -8(%rbx), %edi
; X64-NEXT: callq use@PLT
; X64-NEXT: addl $-6, %ebx
; X64-NEXT: movl %ebx, %eax
; X64-NEXT: popq %rbx
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%t0 = sub i32 %arg, 8
call void @use(i32 %t0)
%t1 = add i32 %t0, 2
ret i32 %t1
}
define <4 x i32> @vec_sub_const_add_const(<4 x i32> %arg) {
; X86-LABEL: vec_sub_const_add_const:
; X86: # %bb.0:
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vec_sub_const_add_const:
; X64: # %bb.0:
; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: retq
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
ret <4 x i32> %t1
}
define <4 x i32> @vec_sub_const_add_const_extrause(<4 x i32> %arg) {
; X86-LABEL: vec_sub_const_add_const_extrause:
; X86: # %bb.0:
; X86-NEXT: subl $16, %esp
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: calll vec_use@PLT
; X86-NEXT: movdqu (%esp), %xmm0 # 16-byte Reload
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: addl $16, %esp
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: vec_sub_const_add_const_extrause:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: callq vec_use@PLT
; X64-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: addq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
call void @vec_use(<4 x i32> %t0)
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
ret <4 x i32> %t1
}
define <4 x i32> @vec_sub_const_add_const_nonsplat(<4 x i32> %arg) {
; X86-LABEL: vec_sub_const_add_const_nonsplat:
; X86: # %bb.0:
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vec_sub_const_add_const_nonsplat:
; X64: # %bb.0:
; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: retq
%t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
%t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
ret <4 x i32> %t1
}
; (x-c1)-c2
define i32 @sub_const_sub_const(i32 %arg) {
; X86-LABEL: sub_const_sub_const:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl $-10, %eax
; X86-NEXT: retl
;
; X64-LABEL: sub_const_sub_const:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal -10(%rdi), %eax
; X64-NEXT: retq
%t0 = sub i32 %arg, 8
%t1 = sub i32 %t0, 2
ret i32 %t1
}
define i32 @sub_const_sub_const_extrause(i32 %arg) {
; X86-LABEL: sub_const_sub_const_extrause:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: leal -8(%esi), %eax
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: calll use@PLT
; X86-NEXT: addl $4, %esp
; X86-NEXT: .cfi_adjust_cfa_offset -4
; X86-NEXT: addl $-10, %esi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: sub_const_sub_const_extrause:
; X64: # %bb.0:
; X64-NEXT: pushq %rbx
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: .cfi_offset %rbx, -16
; X64-NEXT: movl %edi, %ebx
; X64-NEXT: leal -8(%rbx), %edi
; X64-NEXT: callq use@PLT
; X64-NEXT: addl $-10, %ebx
; X64-NEXT: movl %ebx, %eax
; X64-NEXT: popq %rbx
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%t0 = sub i32 %arg, 8
call void @use(i32 %t0)
%t1 = sub i32 %t0, 2
ret i32 %t1
}
define <4 x i32> @vec_sub_const_sub_const(<4 x i32> %arg) {
; X86-LABEL: vec_sub_const_sub_const:
; X86: # %bb.0:
; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vec_sub_const_sub_const:
; X64: # %bb.0:
; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: retq
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
ret <4 x i32> %t1
}
define <4 x i32> @vec_sub_const_sub_const_extrause(<4 x i32> %arg) {
; X86-LABEL: vec_sub_const_sub_const_extrause:
; X86: # %bb.0:
; X86-NEXT: subl $16, %esp
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: calll vec_use@PLT
; X86-NEXT: movdqu (%esp), %xmm0 # 16-byte Reload
; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: addl $16, %esp
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: vec_sub_const_sub_const_extrause:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: callq vec_use@PLT
; X64-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: addq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
call void @vec_use(<4 x i32> %t0)
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
ret <4 x i32> %t1
}
define <4 x i32> @vec_sub_const_sub_const_nonsplat(<4 x i32> %arg) {
; X86-LABEL: vec_sub_const_sub_const_nonsplat:
; X86: # %bb.0:
; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vec_sub_const_sub_const_nonsplat:
; X64: # %bb.0:
; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: retq
%t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
%t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
ret <4 x i32> %t1
}
; c2-(x-c1)
define i32 @sub_const_const_sub(i32 %arg) {
; X86-LABEL: sub_const_const_sub:
; X86: # %bb.0:
; X86-NEXT: movl $10, %eax
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
; X64-LABEL: sub_const_const_sub:
; X64: # %bb.0:
; X64-NEXT: movl $10, %eax
; X64-NEXT: subl %edi, %eax
; X64-NEXT: retq
%t0 = sub i32 %arg, 8
%t1 = sub i32 2, %t0
ret i32 %t1
}
define i32 @sub_const_const_sub_extrause(i32 %arg) {
; X86-LABEL: sub_const_const_sub_extrause:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: leal -8(%esi), %eax
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: calll use@PLT
; X86-NEXT: addl $4, %esp
; X86-NEXT: .cfi_adjust_cfa_offset -4
; X86-NEXT: movl $10, %eax
; X86-NEXT: subl %esi, %eax
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: sub_const_const_sub_extrause:
; X64: # %bb.0:
; X64-NEXT: pushq %rbx
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: .cfi_offset %rbx, -16
; X64-NEXT: movl %edi, %ebx
; X64-NEXT: leal -8(%rbx), %edi
; X64-NEXT: callq use@PLT
; X64-NEXT: movl $10, %eax
; X64-NEXT: subl %ebx, %eax
; X64-NEXT: popq %rbx
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%t0 = sub i32 %arg, 8
call void @use(i32 %t0)
%t1 = sub i32 2, %t0
ret i32 %t1
}
define <4 x i32> @vec_sub_const_const_sub(<4 x i32> %arg) {
; X86-LABEL: vec_sub_const_const_sub:
; X86: # %bb.0:
; X86-NEXT: movdqa {{.*#+}} xmm1 = [10,10,10,10]
; X86-NEXT: psubd %xmm0, %xmm1
; X86-NEXT: movdqa %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vec_sub_const_const_sub:
; X64: # %bb.0:
; X64-NEXT: movdqa {{.*#+}} xmm1 = [10,10,10,10]
; X64-NEXT: psubd %xmm0, %xmm1
; X64-NEXT: movdqa %xmm1, %xmm0
; X64-NEXT: retq
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
ret <4 x i32> %t1
}
define <4 x i32> @vec_sub_const_const_sub_extrause(<4 x i32> %arg) {
; X86-LABEL: vec_sub_const_const_sub_extrause:
; X86: # %bb.0:
; X86-NEXT: subl $16, %esp
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
; X86-NEXT: calll vec_use@PLT
; X86-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2]
; X86-NEXT: movdqu (%esp), %xmm1 # 16-byte Reload
; X86-NEXT: psubd %xmm1, %xmm0
; X86-NEXT: addl $16, %esp
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: vec_sub_const_const_sub_extrause:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
; X64-NEXT: callq vec_use@PLT
; X64-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2]
; X64-NEXT: psubd (%rsp), %xmm0 # 16-byte Folded Reload
; X64-NEXT: addq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
call void @vec_use(<4 x i32> %t0)
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
ret <4 x i32> %t1
}
define <4 x i32> @vec_sub_const_const_sub_nonsplat(<4 x i32> %arg) {
; X86-LABEL: vec_sub_const_const_sub_nonsplat:
; X86: # %bb.0:
; X86-NEXT: movdqa {{.*#+}} xmm1 = <23,u,u,10>
; X86-NEXT: psubd %xmm0, %xmm1
; X86-NEXT: movdqa %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vec_sub_const_const_sub_nonsplat:
; X64: # %bb.0:
; X64-NEXT: movdqa {{.*#+}} xmm1 = <23,u,u,10>
; X64-NEXT: psubd %xmm0, %xmm1
; X64-NEXT: movdqa %xmm1, %xmm0
; X64-NEXT: retq
%t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
%t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
ret <4 x i32> %t1
}
; (c1-x)+c2
define i32 @const_sub_add_const(i32 %arg) {
; X86-LABEL: const_sub_add_const:
; X86: # %bb.0:
; X86-NEXT: movl $10, %eax
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
; X64-LABEL: const_sub_add_const:
; X64: # %bb.0:
; X64-NEXT: movl $10, %eax
; X64-NEXT: subl %edi, %eax
; X64-NEXT: retq
%t0 = sub i32 8, %arg
%t1 = add i32 %t0, 2
ret i32 %t1
}
define i32 @const_sub_add_const_extrause(i32 %arg) {
; X86-LABEL: const_sub_add_const_extrause:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl $8, %eax
; X86-NEXT: subl %esi, %eax
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: calll use@PLT
; X86-NEXT: addl $4, %esp
; X86-NEXT: .cfi_adjust_cfa_offset -4
; X86-NEXT: movl $10, %eax
; X86-NEXT: subl %esi, %eax
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: const_sub_add_const_extrause:
; X64: # %bb.0:
; X64-NEXT: pushq %rbx
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: .cfi_offset %rbx, -16
; X64-NEXT: movl %edi, %ebx
; X64-NEXT: movl $8, %edi
; X64-NEXT: subl %ebx, %edi
; X64-NEXT: callq use@PLT
; X64-NEXT: movl $10, %eax
; X64-NEXT: subl %ebx, %eax
; X64-NEXT: popq %rbx
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%t0 = sub i32 8, %arg
call void @use(i32 %t0)
%t1 = add i32 %t0, 2
ret i32 %t1
}
define <4 x i32> @vec_const_sub_add_const(<4 x i32> %arg) {
; X86-LABEL: vec_const_sub_add_const:
; X86: # %bb.0:
; X86-NEXT: movdqa {{.*#+}} xmm1 = [10,10,10,10]
; X86-NEXT: psubd %xmm0, %xmm1
; X86-NEXT: movdqa %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vec_const_sub_add_const:
; X64: # %bb.0:
; X64-NEXT: movdqa {{.*#+}} xmm1 = [10,10,10,10]
; X64-NEXT: psubd %xmm0, %xmm1
; X64-NEXT: movdqa %xmm1, %xmm0
; X64-NEXT: retq
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
ret <4 x i32> %t1
}
define <4 x i32> @vec_const_sub_add_const_extrause(<4 x i32> %arg) {
; X86-LABEL: vec_const_sub_add_const_extrause:
; X86: # %bb.0:
; X86-NEXT: subl $16, %esp
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
; X86-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
; X86-NEXT: psubd %xmm1, %xmm0
; X86-NEXT: calll vec_use@PLT
; X86-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10]
; X86-NEXT: movdqu (%esp), %xmm1 # 16-byte Reload
; X86-NEXT: psubd %xmm1, %xmm0
; X86-NEXT: addl $16, %esp
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: vec_const_sub_add_const_extrause:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
; X64-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
; X64-NEXT: psubd %xmm1, %xmm0
; X64-NEXT: callq vec_use@PLT
; X64-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10]
; X64-NEXT: psubd (%rsp), %xmm0 # 16-byte Folded Reload
; X64-NEXT: addq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
call void @vec_use(<4 x i32> %t0)
%t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
ret <4 x i32> %t1
}
define <4 x i32> @vec_const_sub_add_const_nonsplat(<4 x i32> %arg) {
; X86-LABEL: vec_const_sub_add_const_nonsplat:
; X86: # %bb.0:
; X86-NEXT: movdqa {{.*#+}} xmm1 = <23,u,u,10>
; X86-NEXT: psubd %xmm0, %xmm1
; X86-NEXT: movdqa %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vec_const_sub_add_const_nonsplat:
; X64: # %bb.0:
; X64-NEXT: movdqa {{.*#+}} xmm1 = <23,u,u,10>
; X64-NEXT: psubd %xmm0, %xmm1
; X64-NEXT: movdqa %xmm1, %xmm0
; X64-NEXT: retq
%t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
%t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
ret <4 x i32> %t1
}
; (c1-x)-c2
define i32 @const_sub_sub_const(i32 %arg) {
; X86-LABEL: const_sub_sub_const:
; X86: # %bb.0:
; X86-NEXT: movl $6, %eax
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
; X64-LABEL: const_sub_sub_const:
; X64: # %bb.0:
; X64-NEXT: movl $6, %eax
; X64-NEXT: subl %edi, %eax
; X64-NEXT: retq
%t0 = sub i32 8, %arg
%t1 = sub i32 %t0, 2
ret i32 %t1
}
define i32 @const_sub_sub_const_extrause(i32 %arg) {
; X86-LABEL: const_sub_sub_const_extrause:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl $8, %eax
; X86-NEXT: subl %esi, %eax
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: calll use@PLT
; X86-NEXT: addl $4, %esp
; X86-NEXT: .cfi_adjust_cfa_offset -4
; X86-NEXT: movl $6, %eax
; X86-NEXT: subl %esi, %eax
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: const_sub_sub_const_extrause:
; X64: # %bb.0:
; X64-NEXT: pushq %rbx
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: .cfi_offset %rbx, -16
; X64-NEXT: movl %edi, %ebx
; X64-NEXT: movl $8, %edi
; X64-NEXT: subl %ebx, %edi
; X64-NEXT: callq use@PLT
; X64-NEXT: movl $6, %eax
; X64-NEXT: subl %ebx, %eax
; X64-NEXT: popq %rbx
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%t0 = sub i32 8, %arg
call void @use(i32 %t0)
%t1 = sub i32 %t0, 2
ret i32 %t1
}
define <4 x i32> @vec_const_sub_sub_const(<4 x i32> %arg) {
; X86-LABEL: vec_const_sub_sub_const:
; X86: # %bb.0:
; X86-NEXT: movdqa {{.*#+}} xmm1 = [6,6,6,6]
; X86-NEXT: psubd %xmm0, %xmm1
; X86-NEXT: movdqa %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vec_const_sub_sub_const:
; X64: # %bb.0:
; X64-NEXT: movdqa {{.*#+}} xmm1 = [6,6,6,6]
; X64-NEXT: psubd %xmm0, %xmm1
; X64-NEXT: movdqa %xmm1, %xmm0
; X64-NEXT: retq
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
ret <4 x i32> %t1
}
define <4 x i32> @vec_const_sub_sub_const_extrause(<4 x i32> %arg) {
; X86-LABEL: vec_const_sub_sub_const_extrause:
; X86: # %bb.0:
; X86-NEXT: subl $16, %esp
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
; X86-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
; X86-NEXT: psubd %xmm1, %xmm0
; X86-NEXT: calll vec_use@PLT
; X86-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6]
; X86-NEXT: movdqu (%esp), %xmm1 # 16-byte Reload
; X86-NEXT: psubd %xmm1, %xmm0
; X86-NEXT: addl $16, %esp
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: vec_const_sub_sub_const_extrause:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
; X64-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
; X64-NEXT: psubd %xmm1, %xmm0
; X64-NEXT: callq vec_use@PLT
; X64-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6]
; X64-NEXT: psubd (%rsp), %xmm0 # 16-byte Folded Reload
; X64-NEXT: addq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
call void @vec_use(<4 x i32> %t0)
%t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
ret <4 x i32> %t1
}
define <4 x i32> @vec_const_sub_sub_const_nonsplat(<4 x i32> %arg) {
; X86-LABEL: vec_const_sub_sub_const_nonsplat:
; X86: # %bb.0:
; X86-NEXT: movdqa {{.*#+}} xmm1 = <19,u,u,6>
; X86-NEXT: psubd %xmm0, %xmm1
; X86-NEXT: movdqa %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vec_const_sub_sub_const_nonsplat:
; X64: # %bb.0:
; X64-NEXT: movdqa {{.*#+}} xmm1 = <19,u,u,6>
; X64-NEXT: psubd %xmm0, %xmm1
; X64-NEXT: movdqa %xmm1, %xmm0
; X64-NEXT: retq
%t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
%t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
ret <4 x i32> %t1
}
; c2-(c1-x)
define i32 @const_sub_const_sub(i32 %arg) {
; X86-LABEL: const_sub_const_sub:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl $-6, %eax
; X86-NEXT: retl
;
; X64-LABEL: const_sub_const_sub:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal -6(%rdi), %eax
; X64-NEXT: retq
%t0 = sub i32 8, %arg
%t1 = sub i32 2, %t0
ret i32 %t1
}
define i32 @const_sub_const_sub_extrause(i32 %arg) {
; X86-LABEL: const_sub_const_sub_extrause:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl $8, %esi
; X86-NEXT: subl {{[0-9]+}}(%esp), %esi
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_adjust_cfa_offset 4
; X86-NEXT: calll use@PLT
; X86-NEXT: addl $4, %esp
; X86-NEXT: .cfi_adjust_cfa_offset -4
; X86-NEXT: movl $2, %eax
; X86-NEXT: subl %esi, %eax
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: const_sub_const_sub_extrause:
; X64: # %bb.0:
; X64-NEXT: pushq %rbx
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: .cfi_offset %rbx, -16
; X64-NEXT: movl $8, %ebx
; X64-NEXT: subl %edi, %ebx
; X64-NEXT: movl %ebx, %edi
; X64-NEXT: callq use@PLT
; X64-NEXT: movl $2, %eax
; X64-NEXT: subl %ebx, %eax
; X64-NEXT: popq %rbx
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%t0 = sub i32 8, %arg
call void @use(i32 %t0)
%t1 = sub i32 2, %t0
ret i32 %t1
}
define <4 x i32> @vec_const_sub_const_sub(<4 x i32> %arg) {
; X86-LABEL: vec_const_sub_const_sub:
; X86: # %bb.0:
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vec_const_sub_const_sub:
; X64: # %bb.0:
; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: retq
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
ret <4 x i32> %t1
}
define <4 x i32> @vec_const_sub_const_sub_extrause(<4 x i32> %arg) {
; X86-LABEL: vec_const_sub_const_sub_extrause:
; X86: # %bb.0:
; X86-NEXT: subl $16, %esp
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: movdqa {{.*#+}} xmm1 = [8,8,8,8]
; X86-NEXT: psubd %xmm0, %xmm1
; X86-NEXT: movdqu %xmm1, (%esp) # 16-byte Spill
; X86-NEXT: movdqa %xmm1, %xmm0
; X86-NEXT: calll vec_use@PLT
; X86-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2]
; X86-NEXT: movdqu (%esp), %xmm1 # 16-byte Reload
; X86-NEXT: psubd %xmm1, %xmm0
; X86-NEXT: addl $16, %esp
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: vec_const_sub_const_sub_extrause:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: movdqa {{.*#+}} xmm1 = [8,8,8,8]
; X64-NEXT: psubd %xmm0, %xmm1
; X64-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill
; X64-NEXT: movdqa %xmm1, %xmm0
; X64-NEXT: callq vec_use@PLT
; X64-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2]
; X64-NEXT: psubd (%rsp), %xmm0 # 16-byte Folded Reload
; X64-NEXT: addq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
call void @vec_use(<4 x i32> %t0)
%t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
ret <4 x i32> %t1
}
define <4 x i32> @vec_const_sub_const_sub_nonsplat(<4 x i32> %arg) {
; X86-LABEL: vec_const_sub_const_sub_nonsplat:
; X86: # %bb.0:
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vec_const_sub_const_sub_nonsplat:
; X64: # %bb.0:
; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: retq
%t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
%t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
ret <4 x i32> %t1
}