
BreakFalseDeps picks the best register for undef operands if instructions have false dependency. The problem is if the instruction is close to the beginning of the function, ReachingDefAnalysis is over optimism to the unused registers, which results in collision with registers just defined in the caller. This patch changes the selection of undef register in an reverse order, which reduces the probability of register collisions between caller and callee. It brings improvement in some of our internal benchmarks with negligible effect on other benchmarks.
3638 lines
164 KiB
LLVM
3638 lines
164 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -O3 -verify-machineinstrs -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c < %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-unknown-unknown"
|
|
|
|
; Stack reload folding tests.
|
|
;
|
|
; By including a nop call with sideeffects we can force a partial register spill of the
|
|
; relevant registers and check that the reload is correctly folded into the instruction.
|
|
|
|
define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_addpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vaddpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fadd <2 x double> %a0, %a1
|
|
ret <2 x double> %2
|
|
}
|
|
|
|
define <4 x double> @stack_fold_addpd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_addpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vaddpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fadd <4 x double> %a0, %a1
|
|
ret <4 x double> %2
|
|
}
|
|
|
|
define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_addps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vaddps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fadd <4 x float> %a0, %a1
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
define <8 x float> @stack_fold_addps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_addps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vaddps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fadd <8 x float> %a0, %a1
|
|
ret <8 x float> %2
|
|
}
|
|
|
|
define double @stack_fold_addsd(double %a0, double %a1) {
|
|
; CHECK-LABEL: stack_fold_addsd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vaddsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 8-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fadd double %a0, %a1
|
|
ret double %2
|
|
}
|
|
|
|
define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_addsd_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vaddsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = extractelement <2 x double> %a0, i32 0
|
|
%3 = extractelement <2 x double> %a1, i32 0
|
|
%4 = fadd double %2, %3
|
|
%5 = insertelement <2 x double> %a0, double %4, i32 0
|
|
ret <2 x double> %5
|
|
}
|
|
declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
|
|
|
|
define float @stack_fold_addss(float %a0, float %a1) {
|
|
; CHECK-LABEL: stack_fold_addss:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vaddss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fadd float %a0, %a1
|
|
ret float %2
|
|
}
|
|
|
|
define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_addss_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vaddss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = extractelement <4 x float> %a0, i32 0
|
|
%3 = extractelement <4 x float> %a1, i32 0
|
|
%4 = fadd float %2, %3
|
|
%5 = insertelement <4 x float> %a0, float %4, i32 0
|
|
ret <4 x float> %5
|
|
}
|
|
declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
|
|
|
|
define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_addsubpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vaddsubpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
|
|
ret <2 x double> %2
|
|
}
|
|
declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
|
|
|
|
define <4 x double> @stack_fold_addsubpd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_addsubpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vaddsubpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1)
|
|
ret <4 x double> %2
|
|
}
|
|
declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
|
|
|
define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_addsubps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vaddsubps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
|
|
ret <4 x float> %2
|
|
}
|
|
declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
|
|
|
|
define <8 x float> @stack_fold_addsubps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_addsubps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vaddsubps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1)
|
|
ret <8 x float> %2
|
|
}
|
|
declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
|
|
|
define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_andnpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vandnpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = bitcast <2 x double> %a0 to <2 x i64>
|
|
%3 = bitcast <2 x double> %a1 to <2 x i64>
|
|
%4 = xor <2 x i64> %2, <i64 -1, i64 -1>
|
|
%5 = and <2 x i64> %4, %3
|
|
%6 = bitcast <2 x i64> %5 to <2 x double>
|
|
; fadd forces execution domain
|
|
%7 = fadd <2 x double> %6, <double 0x0, double 0x0>
|
|
ret <2 x double> %7
|
|
}
|
|
|
|
define <4 x double> @stack_fold_andnpd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_andnpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vandnpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = bitcast <4 x double> %a0 to <4 x i64>
|
|
%3 = bitcast <4 x double> %a1 to <4 x i64>
|
|
%4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1>
|
|
%5 = and <4 x i64> %4, %3
|
|
%6 = bitcast <4 x i64> %5 to <4 x double>
|
|
; fadd forces execution domain
|
|
%7 = fadd <4 x double> %6, <double 0x0, double 0x0, double 0x0, double 0x0>
|
|
ret <4 x double> %7
|
|
}
|
|
|
|
define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_andnps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vandnps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = bitcast <4 x float> %a0 to <2 x i64>
|
|
%3 = bitcast <4 x float> %a1 to <2 x i64>
|
|
%4 = xor <2 x i64> %2, <i64 -1, i64 -1>
|
|
%5 = and <2 x i64> %4, %3
|
|
%6 = bitcast <2 x i64> %5 to <4 x float>
|
|
; fadd forces execution domain
|
|
%7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0>
|
|
ret <4 x float> %7
|
|
}
|
|
|
|
define <8 x float> @stack_fold_andnps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_andnps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vandnps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = bitcast <8 x float> %a0 to <4 x i64>
|
|
%3 = bitcast <8 x float> %a1 to <4 x i64>
|
|
%4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1>
|
|
%5 = and <4 x i64> %4, %3
|
|
%6 = bitcast <4 x i64> %5 to <8 x float>
|
|
; fadd forces execution domain
|
|
%7 = fadd <8 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
|
|
ret <8 x float> %7
|
|
}
|
|
|
|
define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_andpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vandpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = bitcast <2 x double> %a0 to <2 x i64>
|
|
%3 = bitcast <2 x double> %a1 to <2 x i64>
|
|
%4 = and <2 x i64> %2, %3
|
|
%5 = bitcast <2 x i64> %4 to <2 x double>
|
|
; fadd forces execution domain
|
|
%6 = fadd <2 x double> %5, <double 0x0, double 0x0>
|
|
ret <2 x double> %6
|
|
}
|
|
|
|
define <4 x double> @stack_fold_andpd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_andpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vandpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = bitcast <4 x double> %a0 to <4 x i64>
|
|
%3 = bitcast <4 x double> %a1 to <4 x i64>
|
|
%4 = and <4 x i64> %2, %3
|
|
%5 = bitcast <4 x i64> %4 to <4 x double>
|
|
; fadd forces execution domain
|
|
%6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0>
|
|
ret <4 x double> %6
|
|
}
|
|
|
|
define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_andps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vandps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = bitcast <4 x float> %a0 to <2 x i64>
|
|
%3 = bitcast <4 x float> %a1 to <2 x i64>
|
|
%4 = and <2 x i64> %2, %3
|
|
%5 = bitcast <2 x i64> %4 to <4 x float>
|
|
; fadd forces execution domain
|
|
%6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
|
|
ret <4 x float> %6
|
|
}
|
|
|
|
define <8 x float> @stack_fold_andps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_andps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vandps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = bitcast <8 x float> %a0 to <4 x i64>
|
|
%3 = bitcast <8 x float> %a1 to <4 x i64>
|
|
%4 = and <4 x i64> %2, %3
|
|
%5 = bitcast <4 x i64> %4 to <8 x float>
|
|
; fadd forces execution domain
|
|
%6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
|
|
ret <8 x float> %6
|
|
}
|
|
|
|
define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_blendpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vblendpd $2, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[1]
|
|
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1
|
|
; fadd forces execution domain
|
|
%3 = fadd <2 x double> %2, <double 0x0, double 0x0>
|
|
ret <2 x double> %3
|
|
}
|
|
|
|
define <4 x double> @stack_fold_blendpd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_blendpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vblendpd $6, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: # ymm0 = ymm0[0],mem[1,2],ymm0[3]
|
|
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x double> %a0, <4 x double> %a1
|
|
; fadd forces execution domain
|
|
%3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
|
|
ret <4 x double> %3}
|
|
|
|
define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) {
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1
|
|
; fadd forces execution domain
|
|
%3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
|
|
ret <4 x float> %3
|
|
}
|
|
|
|
define <8 x float> @stack_fold_blendps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_blendps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vblendps $102, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: # ymm0 = ymm0[0],mem[1,2],ymm0[3,4],mem[5,6],ymm0[7]
|
|
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = select <8 x i1> <i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1>, <8 x float> %a0, <8 x float> %a1
|
|
; fadd forces execution domain
|
|
%3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
|
|
ret <8 x float> %3
|
|
}
|
|
|
|
define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) {
|
|
; CHECK-LABEL: stack_fold_blendvpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vblendvpd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0)
|
|
ret <2 x double> %2
|
|
}
|
|
declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
|
|
|
define <4 x double> @stack_fold_blendvpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> %c) {
|
|
; CHECK-LABEL: stack_fold_blendvpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vblendvpd %ymm0, {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a1, <4 x double> %c, <4 x double> %a0)
|
|
ret <4 x double> %2
|
|
}
|
|
declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
|
|
|
|
define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) {
|
|
; CHECK-LABEL: stack_fold_blendvps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vblendvps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0)
|
|
ret <4 x float> %2
|
|
}
|
|
declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
|
|
|
define <8 x float> @stack_fold_blendvps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> %c) {
|
|
; CHECK-LABEL: stack_fold_blendvps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vblendvps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a1, <8 x float> %c, <8 x float> %a0)
|
|
ret <8 x float> %2
|
|
}
|
|
declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
|
|
|
|
define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_cmppd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcmpeqpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0)
|
|
ret <2 x double> %2
|
|
}
|
|
declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
|
|
|
|
define <4 x double> @stack_fold_cmppd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_cmppd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcmpeqpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0)
|
|
ret <4 x double> %2
|
|
}
|
|
declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
|
|
|
|
define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_cmpps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcmpeqps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0)
|
|
ret <4 x float> %2
|
|
}
|
|
declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
|
|
|
|
define <8 x float> @stack_fold_cmpps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_cmpps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcmpeqps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0)
|
|
ret <8 x float> %2
|
|
}
|
|
declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
|
|
|
|
define i32 @stack_fold_cmpsd(double %a0, double %a1) {
|
|
; CHECK-LABEL: stack_fold_cmpsd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcmpeqsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 8-byte Folded Reload
|
|
; CHECK-NEXT: vmovq %xmm0, %rax
|
|
; CHECK-NEXT: andl $1, %eax
|
|
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fcmp oeq double %a0, %a1
|
|
%3 = zext i1 %2 to i32
|
|
ret i32 %3
|
|
}
|
|
|
|
define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_cmpsd_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcmpeqsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
|
|
ret <2 x double> %2
|
|
}
|
|
declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
|
|
|
|
define i32 @stack_fold_cmpss(float %a0, float %a1) {
|
|
; CHECK-LABEL: stack_fold_cmpss:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcmpeqss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
|
|
; CHECK-NEXT: vmovd %xmm0, %eax
|
|
; CHECK-NEXT: andl $1, %eax
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fcmp oeq float %a0, %a1
|
|
%3 = zext i1 %2 to i32
|
|
ret i32 %3
|
|
}
|
|
|
|
define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_cmpss_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcmpeqss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0)
|
|
ret <4 x float> %2
|
|
}
|
|
declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
|
|
|
|
; TODO stack_fold_comisd
|
|
|
|
define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_comisd_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcomisd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: setnp %al
|
|
; CHECK-NEXT: sete %cl
|
|
; CHECK-NEXT: andb %al, %cl
|
|
; CHECK-NEXT: movzbl %cl, %eax
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
|
|
ret i32 %2
|
|
}
|
|
declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
|
|
|
|
; TODO stack_fold_comiss
|
|
|
|
define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_comiss_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: setnp %al
|
|
; CHECK-NEXT: sete %cl
|
|
; CHECK-NEXT: andb %al, %cl
|
|
; CHECK-NEXT: movzbl %cl, %eax
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
|
|
ret i32 %2
|
|
}
|
|
declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
|
|
|
|
define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtdq2pd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtdq2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
|
%3 = sitofp <2 x i32> %2 to <2 x double>
|
|
ret <2 x double> %3
|
|
}
|
|
define <2 x double> @stack_fold_cvtdq2pd_int(<4 x i32> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtdq2pd_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtdq2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <4 x i32> %a0, <4 x i32> %a0, <2 x i32> <i32 0, i32 1>
|
|
%cvt = sitofp <2 x i32> %2 to <2 x double>
|
|
ret <2 x double> %cvt
|
|
}
|
|
|
|
define <4 x double> @stack_fold_cvtdq2pd_ymm(<4 x i32> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtdq2pd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtdq2pd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = sitofp <4 x i32> %a0 to <4 x double>
|
|
ret <4 x double> %2
|
|
}
|
|
|
|
define <4 x double> @stack_fold_cvtdq2pd_ymm_int(<4 x i32> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtdq2pd_ymm_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtdq2pd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%cvt = sitofp <4 x i32> %a0 to <4 x double>
|
|
ret <4 x double> %cvt
|
|
}
|
|
|
|
define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtdq2ps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtdq2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = sitofp <4 x i32> %a0 to <4 x float>
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
define <8 x float> @stack_fold_cvtdq2ps_ymm(<8 x i32> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtdq2ps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtdq2ps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = sitofp <8 x i32> %a0 to <8 x float>
|
|
ret <8 x float> %2
|
|
}
|
|
|
|
define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtpd2dq:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtpd2dqx {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
|
|
ret <4 x i32> %2
|
|
}
|
|
declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
|
|
|
|
define <4 x i32> @stack_fold_cvtpd2dq_ymm(<4 x double> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtpd2dq_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtpd2dqy {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0)
|
|
ret <4 x i32> %2
|
|
}
|
|
declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
|
|
|
|
define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtpd2ps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtpd2psx {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fptrunc <2 x double> %a0 to <2 x float>
|
|
ret <2 x float> %2
|
|
}
|
|
|
|
define <4 x float> @stack_fold_cvtpd2ps_ymm(<4 x double> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtpd2ps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtpd2psy {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fptrunc <4 x double> %a0 to <4 x float>
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
define <4 x float> @stack_fold_cvtph2ps(<8 x i16> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtph2ps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0)
|
|
ret <4 x float> %2
|
|
}
|
|
declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) nounwind readonly
|
|
|
|
define <8 x float> @stack_fold_cvtph2ps_ymm(<8 x i16> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtph2ps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0)
|
|
ret <8 x float> %2
|
|
}
|
|
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readonly
|
|
|
|
define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtps2dq:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtps2dq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
|
|
ret <4 x i32> %2
|
|
}
|
|
declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
|
|
|
|
define <8 x i32> @stack_fold_cvtps2dq_ymm(<8 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtps2dq_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtps2dq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0)
|
|
ret <8 x i32> %2
|
|
}
|
|
declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
|
|
|
|
define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtps2pd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtps2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <4 x float> %a0, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
|
%3 = fpext <2 x float> %2 to <2 x double>
|
|
ret <2 x double> %3
|
|
}
|
|
|
|
define <2 x double> @stack_fold_cvtps2pd_int(<4 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtps2pd_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtps2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1>
|
|
%cvtps2pd = fpext <2 x float> %2 to <2 x double>
|
|
ret <2 x double> %cvtps2pd
|
|
}
|
|
|
|
define <4 x double> @stack_fold_cvtps2pd_ymm(<4 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtps2pd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtps2pd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fpext <4 x float> %a0 to <4 x double>
|
|
ret <4 x double> %2
|
|
}
|
|
|
|
define <4 x double> @stack_fold_cvtps2pd_ymm_int(<4 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtps2pd_ymm_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtps2pd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%cvtps2pd = fpext <4 x float> %a0 to <4 x double>
|
|
ret <4 x double> %cvtps2pd
|
|
}
|
|
|
|
define <8 x i16> @stack_fold_cvtps2ph_ymm(<8 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtps2ph_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vcvtps2ph $0, %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Folded Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
%1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0)
|
|
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
ret <8 x i16> %1
|
|
}
|
|
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readonly
|
|
|
|
; TODO stack_fold_cvtsd2si
|
|
|
|
define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtsd2si_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtsd2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
|
|
ret i32 %2
|
|
}
|
|
declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
|
|
|
|
; TODO stack_fold_cvtsd2si64
|
|
|
|
define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtsd2si64_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtsd2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0)
|
|
ret i64 %2
|
|
}
|
|
declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
|
|
|
|
define double @stack_fold_cvtsi2sd(i32 %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtsi2sd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pushq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: pushq %r15
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
; CHECK-NEXT: pushq %r14
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: pushq %r13
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
; CHECK-NEXT: pushq %r12
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-NEXT: pushq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 56
|
|
; CHECK-NEXT: .cfi_offset %rbx, -56
|
|
; CHECK-NEXT: .cfi_offset %r12, -48
|
|
; CHECK-NEXT: .cfi_offset %r13, -40
|
|
; CHECK-NEXT: .cfi_offset %r14, -32
|
|
; CHECK-NEXT: .cfi_offset %r15, -24
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtsi2sdl {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 4-byte Folded Reload
|
|
; CHECK-NEXT: popq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-NEXT: popq %r12
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
; CHECK-NEXT: popq %r13
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: popq %r14
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
; CHECK-NEXT: popq %r15
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: popq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
%2 = sitofp i32 %a0 to double
|
|
ret double %2
|
|
}
|
|
|
|
define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtsi2sd_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pushq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: pushq %r15
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
; CHECK-NEXT: pushq %r14
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: pushq %r13
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
; CHECK-NEXT: pushq %r12
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-NEXT: pushq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 56
|
|
; CHECK-NEXT: .cfi_offset %rbx, -56
|
|
; CHECK-NEXT: .cfi_offset %r12, -48
|
|
; CHECK-NEXT: .cfi_offset %r13, -40
|
|
; CHECK-NEXT: .cfi_offset %r14, -32
|
|
; CHECK-NEXT: .cfi_offset %r15, -24
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtsi2sdl {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 4-byte Folded Reload
|
|
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
|
; CHECK-NEXT: popq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-NEXT: popq %r12
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
; CHECK-NEXT: popq %r13
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: popq %r14
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
; CHECK-NEXT: popq %r15
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: popq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
%2 = sitofp i32 %a0 to double
|
|
%3 = insertelement <2 x double> zeroinitializer, double %2, i64 0
|
|
ret <2 x double> %3
|
|
}
|
|
|
|
define double @stack_fold_cvtsi642sd(i64 %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtsi642sd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pushq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: pushq %r15
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
; CHECK-NEXT: pushq %r14
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: pushq %r13
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
; CHECK-NEXT: pushq %r12
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-NEXT: pushq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 56
|
|
; CHECK-NEXT: .cfi_offset %rbx, -56
|
|
; CHECK-NEXT: .cfi_offset %r12, -48
|
|
; CHECK-NEXT: .cfi_offset %r13, -40
|
|
; CHECK-NEXT: .cfi_offset %r14, -32
|
|
; CHECK-NEXT: .cfi_offset %r15, -24
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtsi2sdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 8-byte Folded Reload
|
|
; CHECK-NEXT: popq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-NEXT: popq %r12
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
; CHECK-NEXT: popq %r13
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: popq %r14
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
; CHECK-NEXT: popq %r15
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: popq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
%2 = sitofp i64 %a0 to double
|
|
ret double %2
|
|
}
|
|
|
|
define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtsi642sd_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pushq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: pushq %r15
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
; CHECK-NEXT: pushq %r14
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: pushq %r13
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
; CHECK-NEXT: pushq %r12
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-NEXT: pushq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 56
|
|
; CHECK-NEXT: .cfi_offset %rbx, -56
|
|
; CHECK-NEXT: .cfi_offset %r12, -48
|
|
; CHECK-NEXT: .cfi_offset %r13, -40
|
|
; CHECK-NEXT: .cfi_offset %r14, -32
|
|
; CHECK-NEXT: .cfi_offset %r15, -24
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtsi2sdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 8-byte Folded Reload
|
|
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
|
; CHECK-NEXT: popq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-NEXT: popq %r12
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
; CHECK-NEXT: popq %r13
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: popq %r14
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
; CHECK-NEXT: popq %r15
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: popq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
%2 = sitofp i64 %a0 to double
|
|
%3 = insertelement <2 x double> zeroinitializer, double %2, i64 0
|
|
ret <2 x double> %3
|
|
}
|
|
|
|
define float @stack_fold_cvtsi2ss(i32 %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtsi2ss:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pushq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: pushq %r15
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
; CHECK-NEXT: pushq %r14
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: pushq %r13
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
; CHECK-NEXT: pushq %r12
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-NEXT: pushq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 56
|
|
; CHECK-NEXT: .cfi_offset %rbx, -56
|
|
; CHECK-NEXT: .cfi_offset %r12, -48
|
|
; CHECK-NEXT: .cfi_offset %r13, -40
|
|
; CHECK-NEXT: .cfi_offset %r14, -32
|
|
; CHECK-NEXT: .cfi_offset %r15, -24
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtsi2ssl {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 4-byte Folded Reload
|
|
; CHECK-NEXT: popq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-NEXT: popq %r12
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
; CHECK-NEXT: popq %r13
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: popq %r14
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
; CHECK-NEXT: popq %r15
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: popq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
%2 = sitofp i32 %a0 to float
|
|
ret float %2
|
|
}
|
|
|
|
define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtsi2ss_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pushq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: pushq %r15
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
; CHECK-NEXT: pushq %r14
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: pushq %r13
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
; CHECK-NEXT: pushq %r12
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-NEXT: pushq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 56
|
|
; CHECK-NEXT: .cfi_offset %rbx, -56
|
|
; CHECK-NEXT: .cfi_offset %r12, -48
|
|
; CHECK-NEXT: .cfi_offset %r13, -40
|
|
; CHECK-NEXT: .cfi_offset %r14, -32
|
|
; CHECK-NEXT: .cfi_offset %r15, -24
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtsi2ssl {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 4-byte Folded Reload
|
|
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
|
; CHECK-NEXT: popq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-NEXT: popq %r12
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
; CHECK-NEXT: popq %r13
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: popq %r14
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
; CHECK-NEXT: popq %r15
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: popq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
%2 = sitofp i32 %a0 to float
|
|
%3 = insertelement <4 x float> zeroinitializer, float %2, i64 0
|
|
ret <4 x float> %3
|
|
}
|
|
|
|
define float @stack_fold_cvtsi642ss(i64 %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtsi642ss:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pushq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: pushq %r15
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
; CHECK-NEXT: pushq %r14
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: pushq %r13
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
; CHECK-NEXT: pushq %r12
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-NEXT: pushq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 56
|
|
; CHECK-NEXT: .cfi_offset %rbx, -56
|
|
; CHECK-NEXT: .cfi_offset %r12, -48
|
|
; CHECK-NEXT: .cfi_offset %r13, -40
|
|
; CHECK-NEXT: .cfi_offset %r14, -32
|
|
; CHECK-NEXT: .cfi_offset %r15, -24
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtsi2ssq {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 8-byte Folded Reload
|
|
; CHECK-NEXT: popq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-NEXT: popq %r12
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
; CHECK-NEXT: popq %r13
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: popq %r14
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
; CHECK-NEXT: popq %r15
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: popq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
%2 = sitofp i64 %a0 to float
|
|
ret float %2
|
|
}
|
|
|
|
define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtsi642ss_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pushq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: pushq %r15
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
; CHECK-NEXT: pushq %r14
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: pushq %r13
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
; CHECK-NEXT: pushq %r12
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-NEXT: pushq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 56
|
|
; CHECK-NEXT: .cfi_offset %rbx, -56
|
|
; CHECK-NEXT: .cfi_offset %r12, -48
|
|
; CHECK-NEXT: .cfi_offset %r13, -40
|
|
; CHECK-NEXT: .cfi_offset %r14, -32
|
|
; CHECK-NEXT: .cfi_offset %r15, -24
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtsi2ssq {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 8-byte Folded Reload
|
|
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
|
; CHECK-NEXT: popq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-NEXT: popq %r12
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
; CHECK-NEXT: popq %r13
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: popq %r14
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
; CHECK-NEXT: popq %r15
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: popq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
%2 = sitofp i64 %a0 to float
|
|
%3 = insertelement <4 x float> zeroinitializer, float %2, i64 0
|
|
ret <4 x float> %3
|
|
}
|
|
|
|
; TODO stack_fold_cvtss2si
|
|
|
|
define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtss2si_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtss2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0)
|
|
ret i32 %2
|
|
}
|
|
declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
|
|
|
|
; TODO stack_fold_cvtss2si64
|
|
|
|
define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvtss2si64_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvtss2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0)
|
|
ret i64 %2
|
|
}
|
|
declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
|
|
|
|
define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvttpd2dq:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvttpd2dqx {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
|
|
ret <4 x i32> %2
|
|
}
|
|
declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
|
|
|
|
define <4 x i32> @stack_fold_cvttpd2dq_ymm(<4 x double> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvttpd2dq_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvttpd2dqy {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fptosi <4 x double> %a0 to <4 x i32>
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvttps2dq:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvttps2dq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fptosi <4 x float> %a0 to <4 x i32>
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define <8 x i32> @stack_fold_cvttps2dq_ymm(<8 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvttps2dq_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvttps2dq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fptosi <8 x float> %a0 to <8 x i32>
|
|
ret <8 x i32> %2
|
|
}
|
|
|
|
define i32 @stack_fold_cvttsd2si(double %a0) {
|
|
; CHECK-LABEL: stack_fold_cvttsd2si:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvttsd2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 8-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fptosi double %a0 to i32
|
|
ret i32 %2
|
|
}
|
|
|
|
define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvttsd2si_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvttsd2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0)
|
|
ret i32 %2
|
|
}
|
|
declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
|
|
|
|
define i64 @stack_fold_cvttsd2si64(double %a0) {
|
|
; CHECK-LABEL: stack_fold_cvttsd2si64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvttsd2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fptosi double %a0 to i64
|
|
ret i64 %2
|
|
}
|
|
|
|
define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvttsd2si64_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvttsd2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0)
|
|
ret i64 %2
|
|
}
|
|
declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
|
|
|
|
define i32 @stack_fold_cvttss2si(float %a0) {
|
|
; CHECK-LABEL: stack_fold_cvttss2si:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvttss2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fptosi float %a0 to i32
|
|
ret i32 %2
|
|
}
|
|
|
|
define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvttss2si_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvttss2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0)
|
|
ret i32 %2
|
|
}
|
|
declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
|
|
|
|
define i64 @stack_fold_cvttss2si64(float %a0) {
|
|
; CHECK-LABEL: stack_fold_cvttss2si64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvttss2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 4-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fptosi float %a0 to i64
|
|
ret i64 %2
|
|
}
|
|
|
|
define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_cvttss2si64_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vcvttss2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0)
|
|
ret i64 %2
|
|
}
|
|
declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
|
|
|
|
define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_divpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vdivpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fdiv <2 x double> %a0, %a1
|
|
ret <2 x double> %2
|
|
}
|
|
|
|
define <4 x double> @stack_fold_divpd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_divpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vdivpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fdiv <4 x double> %a0, %a1
|
|
ret <4 x double> %2
|
|
}
|
|
|
|
define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_divps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vdivps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fdiv <4 x float> %a0, %a1
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
define <8 x float> @stack_fold_divps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_divps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vdivps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fdiv <8 x float> %a0, %a1
|
|
ret <8 x float> %2
|
|
}
|
|
|
|
define double @stack_fold_divsd(double %a0, double %a1) {
|
|
; CHECK-LABEL: stack_fold_divsd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vdivsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 8-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fdiv double %a0, %a1
|
|
ret double %2
|
|
}
|
|
|
|
define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_divsd_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vdivsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = extractelement <2 x double> %a0, i32 0
|
|
%3 = extractelement <2 x double> %a1, i32 0
|
|
%4 = fdiv double %2, %3
|
|
%5 = insertelement <2 x double> %a0, double %4, i32 0
|
|
ret <2 x double> %5
|
|
}
|
|
|
|
define float @stack_fold_divss(float %a0, float %a1) {
|
|
; CHECK-LABEL: stack_fold_divss:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vdivss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fdiv float %a0, %a1
|
|
ret float %2
|
|
}
|
|
|
|
define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_divss_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vdivss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = extractelement <4 x float> %a0, i32 0
|
|
%3 = extractelement <4 x float> %a1, i32 0
|
|
%4 = fdiv float %2, %3
|
|
%5 = insertelement <4 x float> %a0, float %4, i32 0
|
|
ret <4 x float> %5
|
|
}
|
|
|
|
define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_dppd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vdppd $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7)
|
|
ret <2 x double> %2
|
|
}
|
|
declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
|
|
|
|
define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_dpps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vdpps $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7)
|
|
ret <4 x float> %2
|
|
}
|
|
declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
|
|
|
|
define <8 x float> @stack_fold_dpps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_dpps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vdpps $7, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7)
|
|
ret <8 x float> %2
|
|
}
|
|
declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
|
|
|
|
define <4 x float> @stack_fold_extractf128(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_extractf128:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Folded Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
%1 = shufflevector <8 x float> %a0, <8 x float> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
|
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
ret <4 x float> %1
|
|
}
|
|
|
|
define i32 @stack_fold_extractps(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_extractps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pushq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: pushq %r15
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
; CHECK-NEXT: pushq %r14
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: pushq %r13
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
; CHECK-NEXT: pushq %r12
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-NEXT: pushq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 56
|
|
; CHECK-NEXT: .cfi_offset %rbx, -56
|
|
; CHECK-NEXT: .cfi_offset %r12, -48
|
|
; CHECK-NEXT: .cfi_offset %r13, -40
|
|
; CHECK-NEXT: .cfi_offset %r14, -32
|
|
; CHECK-NEXT: .cfi_offset %r15, -24
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vextractps $1, %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
|
|
; CHECK-NEXT: popq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-NEXT: popq %r12
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 40
|
|
; CHECK-NEXT: popq %r13
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: popq %r14
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
|
; CHECK-NEXT: popq %r15
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: popq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-NEXT: retq
|
|
; fadd forces execution domain
|
|
%1 = fadd <4 x float> %a0, %a1
|
|
%2 = extractelement <4 x float> %1, i32 1
|
|
%3 = bitcast float %2 to i32
|
|
%4 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
ret i32 %3
|
|
}
|
|
|
|
define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_haddpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vhaddpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
|
|
ret <2 x double> %2
|
|
}
|
|
declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
|
|
|
|
define <4 x double> @stack_fold_haddpd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_haddpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vhaddpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1)
|
|
ret <4 x double> %2
|
|
}
|
|
declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
|
|
|
define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_haddps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vhaddps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
|
|
ret <4 x float> %2
|
|
}
|
|
declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
|
|
|
|
define <8 x float> @stack_fold_haddps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_haddps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vhaddps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1)
|
|
ret <8 x float> %2
|
|
}
|
|
declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
|
|
|
define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_hsubpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vhsubpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
|
|
ret <2 x double> %2
|
|
}
|
|
declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
|
|
|
|
define <4 x double> @stack_fold_hsubpd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_hsubpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vhsubpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1)
|
|
ret <4 x double> %2
|
|
}
|
|
declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
|
|
|
define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_hsubps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vhsubps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
|
|
ret <4 x float> %2
|
|
}
|
|
declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
|
|
|
|
define <8 x float> @stack_fold_hsubps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_hsubps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vhsubps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1)
|
|
ret <8 x float> %2
|
|
}
|
|
declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
|
|
|
define <8 x float> @stack_fold_insertf128(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_insertf128:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
ret <8 x float> %2
|
|
}
|
|
|
|
define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_insertps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vinsertps $17, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = zero,mem[0],xmm0[2,3]
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209)
|
|
ret <4 x float> %2
|
|
}
|
|
declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
|
|
|
|
define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 {
|
|
; CHECK-LABEL: stack_fold_maxpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmaxpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
|
|
ret <2 x double> %2
|
|
}
|
|
declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
|
|
|
|
define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 {
|
|
; CHECK-LABEL: stack_fold_maxpd_commutable:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmaxpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
|
|
ret <2 x double> %2
|
|
}
|
|
|
|
define <4 x double> @stack_fold_maxpd_ymm(<4 x double> %a0, <4 x double> %a1) #0 {
|
|
; CHECK-LABEL: stack_fold_maxpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmaxpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1)
|
|
ret <4 x double> %2
|
|
}
|
|
declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
|
|
|
define <4 x double> @stack_fold_maxpd_ymm_commutable(<4 x double> %a0, <4 x double> %a1) #1 {
|
|
; CHECK-LABEL: stack_fold_maxpd_ymm_commutable:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmaxpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1)
|
|
ret <4 x double> %2
|
|
}
|
|
|
|
define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) #0 {
|
|
; CHECK-LABEL: stack_fold_maxps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmaxps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
|
|
ret <4 x float> %2
|
|
}
|
|
declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
|
|
|
|
define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1) #1 {
|
|
; CHECK-LABEL: stack_fold_maxps_commutable:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmaxps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
define <8 x float> @stack_fold_maxps_ymm(<8 x float> %a0, <8 x float> %a1) #0 {
|
|
; CHECK-LABEL: stack_fold_maxps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmaxps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
|
|
ret <8 x float> %2
|
|
}
|
|
declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
|
|
|
define <8 x float> @stack_fold_maxps_ymm_commutable(<8 x float> %a0, <8 x float> %a1) #1 {
|
|
; CHECK-LABEL: stack_fold_maxps_ymm_commutable:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmaxps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
|
|
ret <8 x float> %2
|
|
}
|
|
|
|
define double @stack_fold_maxsd(double %a0, double %a1) #0 {
|
|
; CHECK-LABEL: stack_fold_maxsd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmaxsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 8-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fcmp ogt double %a0, %a1
|
|
%3 = select i1 %2, double %a0, double %a1
|
|
ret double %3
|
|
}
|
|
|
|
define double @stack_fold_maxsd_commutable(double %a0, double %a1) #1 {
|
|
; CHECK-LABEL: stack_fold_maxsd_commutable:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmaxsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 8-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fcmp ogt double %a0, %a1
|
|
%3 = select i1 %2, double %a0, double %a1
|
|
ret double %3
|
|
}
|
|
|
|
define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) #0 {
|
|
; CHECK-LABEL: stack_fold_maxsd_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmaxsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
|
|
ret <2 x double> %2
|
|
}
|
|
declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
|
|
|
|
define float @stack_fold_maxss(float %a0, float %a1) #0 {
|
|
; CHECK-LABEL: stack_fold_maxss:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmaxss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fcmp ogt float %a0, %a1
|
|
%3 = select i1 %2, float %a0, float %a1
|
|
ret float %3
|
|
}
|
|
|
|
define float @stack_fold_maxss_commutable(float %a0, float %a1) #1 {
|
|
; CHECK-LABEL: stack_fold_maxss_commutable:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmaxss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fcmp ogt float %a0, %a1
|
|
%3 = select i1 %2, float %a0, float %a1
|
|
ret float %3
|
|
}
|
|
|
|
define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) #0 {
|
|
; CHECK-LABEL: stack_fold_maxss_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmaxss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
|
|
ret <4 x float> %2
|
|
}
|
|
declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
|
|
|
|
define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) #0 {
|
|
; CHECK-LABEL: stack_fold_minpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vminpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
|
|
ret <2 x double> %2
|
|
}
|
|
declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
|
|
|
|
define <2 x double> @stack_fold_minpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 {
|
|
; CHECK-LABEL: stack_fold_minpd_commutable:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vminpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
|
|
ret <2 x double> %2
|
|
}
|
|
|
|
define <4 x double> @stack_fold_minpd_ymm(<4 x double> %a0, <4 x double> %a1) #0 {
|
|
; CHECK-LABEL: stack_fold_minpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vminpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1)
|
|
ret <4 x double> %2
|
|
}
|
|
declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
|
|
|
define <4 x double> @stack_fold_minpd_ymm_commutable(<4 x double> %a0, <4 x double> %a1) #1 {
|
|
; CHECK-LABEL: stack_fold_minpd_ymm_commutable:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vminpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1)
|
|
ret <4 x double> %2
|
|
}
|
|
|
|
define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) #0 {
|
|
; CHECK-LABEL: stack_fold_minps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vminps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
|
|
ret <4 x float> %2
|
|
}
|
|
declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
|
|
|
|
define <4 x float> @stack_fold_minps_commutable(<4 x float> %a0, <4 x float> %a1) #1 {
|
|
; CHECK-LABEL: stack_fold_minps_commutable:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vminps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
define <8 x float> @stack_fold_minps_ymm(<8 x float> %a0, <8 x float> %a1) #0 {
|
|
; CHECK-LABEL: stack_fold_minps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vminps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
|
|
ret <8 x float> %2
|
|
}
|
|
declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
|
|
|
define <8 x float> @stack_fold_minps_ymm_commutable(<8 x float> %a0, <8 x float> %a1) #1 {
|
|
; CHECK-LABEL: stack_fold_minps_ymm_commutable:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vminps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
|
|
ret <8 x float> %2
|
|
}
|
|
|
|
define double @stack_fold_minsd(double %a0, double %a1) #0 {
|
|
; CHECK-LABEL: stack_fold_minsd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vminsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 8-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fcmp olt double %a0, %a1
|
|
%3 = select i1 %2, double %a0, double %a1
|
|
ret double %3
|
|
}
|
|
|
|
define double @stack_fold_minsd_commutable(double %a0, double %a1) #1 {
|
|
; CHECK-LABEL: stack_fold_minsd_commutable:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vminsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 8-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fcmp olt double %a0, %a1
|
|
%3 = select i1 %2, double %a0, double %a1
|
|
ret double %3
|
|
}
|
|
|
|
define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_minsd_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vminsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
|
|
ret <2 x double> %2
|
|
}
|
|
declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
|
|
|
|
define float @stack_fold_minss(float %a0, float %a1) #0 {
|
|
; CHECK-LABEL: stack_fold_minss:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vminss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fcmp olt float %a0, %a1
|
|
%3 = select i1 %2, float %a0, float %a1
|
|
ret float %3
|
|
}
|
|
|
|
define float @stack_fold_minss_commutable(float %a0, float %a1) #1 {
|
|
; CHECK-LABEL: stack_fold_minss_commutable:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vminss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fcmp olt float %a0, %a1
|
|
%3 = select i1 %2, float %a0, float %a1
|
|
ret float %3
|
|
}
|
|
|
|
define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) #0 {
|
|
; CHECK-LABEL: stack_fold_minss_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vminss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
|
|
ret <4 x float> %2
|
|
}
|
|
declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
|
|
|
|
define <2 x double> @stack_fold_movddup(<2 x double> %a0) {
|
|
; CHECK-LABEL: stack_fold_movddup:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmovddup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[0,0]
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
|
ret <2 x double> %2
|
|
}
|
|
|
|
define <4 x double> @stack_fold_movddup_ymm(<4 x double> %a0) {
|
|
; CHECK-LABEL: stack_fold_movddup_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmovddup {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: # ymm0 = mem[0,0,2,2]
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
|
|
ret <4 x double> %2
|
|
}
|
|
|
|
; TODO stack_fold_movhpd (load / store)
|
|
; TODO stack_fold_movhps (load / store)
|
|
|
|
; TODO stack_fold_movlpd (load / store)
|
|
; TODO stack_fold_movlps (load / store)
|
|
|
|
define <4 x float> @stack_fold_movshdup(<4 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_movshdup:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
define <8 x float> @stack_fold_movshdup_ymm(<8 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_movshdup_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: # ymm0 = mem[1,1,3,3,5,5,7,7]
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
|
|
ret <8 x float> %2
|
|
}
|
|
|
|
define <4 x float> @stack_fold_movsldup(<4 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_movsldup:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmovsldup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[0,0,2,2]
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
define <8 x float> @stack_fold_movsldup_ymm(<8 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_movsldup_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmovsldup {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: # ymm0 = mem[0,0,2,2,4,4,6,6]
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
|
|
ret <8 x float> %2
|
|
}
|
|
|
|
define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_mulpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmulpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fmul <2 x double> %a0, %a1
|
|
ret <2 x double> %2
|
|
}
|
|
|
|
define <4 x double> @stack_fold_mulpd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_mulpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmulpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fmul <4 x double> %a0, %a1
|
|
ret <4 x double> %2
|
|
}
|
|
|
|
define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_mulps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmulps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fmul <4 x float> %a0, %a1
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
define <8 x float> @stack_fold_mulps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_mulps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmulps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fmul <8 x float> %a0, %a1
|
|
ret <8 x float> %2
|
|
}
|
|
|
|
define double @stack_fold_mulsd(double %a0, double %a1) {
|
|
; CHECK-LABEL: stack_fold_mulsd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmulsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 8-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fmul double %a0, %a1
|
|
ret double %2
|
|
}
|
|
|
|
define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_mulsd_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmulsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = extractelement <2 x double> %a0, i32 0
|
|
%3 = extractelement <2 x double> %a1, i32 0
|
|
%4 = fmul double %2, %3
|
|
%5 = insertelement <2 x double> %a0, double %4, i32 0
|
|
ret <2 x double> %5
|
|
}
|
|
|
|
define float @stack_fold_mulss(float %a0, float %a1) {
|
|
; CHECK-LABEL: stack_fold_mulss:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fmul float %a0, %a1
|
|
ret float %2
|
|
}
|
|
|
|
define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_mulss_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = extractelement <4 x float> %a0, i32 0
|
|
%3 = extractelement <4 x float> %a1, i32 0
|
|
%4 = fmul float %2, %3
|
|
%5 = insertelement <4 x float> %a0, float %4, i32 0
|
|
ret <4 x float> %5
|
|
}
|
|
|
|
define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_orpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vorpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = bitcast <2 x double> %a0 to <2 x i64>
|
|
%3 = bitcast <2 x double> %a1 to <2 x i64>
|
|
%4 = or <2 x i64> %2, %3
|
|
%5 = bitcast <2 x i64> %4 to <2 x double>
|
|
; fadd forces execution domain
|
|
%6 = fadd <2 x double> %5, <double 0x0, double 0x0>
|
|
ret <2 x double> %6
|
|
}
|
|
|
|
define <4 x double> @stack_fold_orpd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_orpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vorpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = bitcast <4 x double> %a0 to <4 x i64>
|
|
%3 = bitcast <4 x double> %a1 to <4 x i64>
|
|
%4 = or <4 x i64> %2, %3
|
|
%5 = bitcast <4 x i64> %4 to <4 x double>
|
|
; fadd forces execution domain
|
|
%6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0>
|
|
ret <4 x double> %6
|
|
}
|
|
|
|
define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_orps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vorps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = bitcast <4 x float> %a0 to <2 x i64>
|
|
%3 = bitcast <4 x float> %a1 to <2 x i64>
|
|
%4 = or <2 x i64> %2, %3
|
|
%5 = bitcast <2 x i64> %4 to <4 x float>
|
|
; fadd forces execution domain
|
|
%6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
|
|
ret <4 x float> %6
|
|
}
|
|
|
|
define <8 x float> @stack_fold_orps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_orps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vorps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = bitcast <8 x float> %a0 to <4 x i64>
|
|
%3 = bitcast <8 x float> %a1 to <4 x i64>
|
|
%4 = or <4 x i64> %2, %3
|
|
%5 = bitcast <4 x i64> %4 to <8 x float>
|
|
; fadd forces execution domain
|
|
%6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
|
|
ret <8 x float> %6
|
|
}
|
|
|
|
define <8 x float> @stack_fold_perm2f128(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_perm2f128:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vperm2f128 $33, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: # ymm0 = ymm0[2,3],mem[0,1]
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
|
|
ret <8 x float> %2
|
|
}
|
|
|
|
define <2 x double> @stack_fold_permilpd(<2 x double> %a0) {
|
|
; CHECK-LABEL: stack_fold_permilpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[1,0]
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 1, i32 0>
|
|
ret <2 x double> %2
|
|
}
|
|
|
|
define <4 x double> @stack_fold_permilpd_ymm(<4 x double> %a0) {
|
|
; CHECK-LABEL: stack_fold_permilpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vpermilpd $5, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: # ymm0 = mem[1,0,3,2]
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
|
ret <4 x double> %2
|
|
}
|
|
|
|
define <2 x double> @stack_fold_permilpdvar(<2 x double> %a0, <2 x i64> %a1) {
|
|
; CHECK-LABEL: stack_fold_permilpdvar:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vpermilpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1)
|
|
ret <2 x double> %2
|
|
}
|
|
declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
|
|
|
|
define <4 x double> @stack_fold_permilpdvar_ymm(<4 x double> %a0, <4 x i64> %a1) {
|
|
; CHECK-LABEL: stack_fold_permilpdvar_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vpermilpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1)
|
|
ret <4 x double> %2
|
|
}
|
|
declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
|
|
|
|
define <4 x float> @stack_fold_permilps(<4 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_permilps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vpermilps $27, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[3,2,1,0]
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
define <8 x float> @stack_fold_permilps_ymm(<8 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_permilps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vpermilps $27, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: # ymm0 = mem[3,2,1,0,7,6,5,4]
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
|
|
ret <8 x float> %2
|
|
}
|
|
|
|
define <4 x float> @stack_fold_permilpsvar(<4 x float> %a0, <4 x i32> %a1) {
|
|
; CHECK-LABEL: stack_fold_permilpsvar:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vpermilps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1)
|
|
ret <4 x float> %2
|
|
}
|
|
declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
|
|
|
|
define <8 x float> @stack_fold_permilpsvar_ymm(<8 x float> %a0, <8 x i32> %a1) {
|
|
; CHECK-LABEL: stack_fold_permilpsvar_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vpermilps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1)
|
|
ret <8 x float> %2
|
|
}
|
|
declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
|
|
|
|
; TODO stack_fold_rcpps
|
|
|
|
define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_rcpps_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vrcpps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
|
|
ret <4 x float> %2
|
|
}
|
|
declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
|
|
|
|
; TODO stack_fold_rcpps_ymm
|
|
|
|
define <8 x float> @stack_fold_rcpps_ymm_int(<8 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_rcpps_ymm_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vrcpps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0)
|
|
ret <8 x float> %2
|
|
}
|
|
declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
|
|
|
|
; TODO stack_fold_rcpss
|
|
; TODO stack_fold_rcpss_int
|
|
|
|
define <2 x double> @stack_fold_roundpd(<2 x double> %a0) {
|
|
; CHECK-LABEL: stack_fold_roundpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vroundpd $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7)
|
|
ret <2 x double> %2
|
|
}
|
|
declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
|
|
|
|
define <4 x double> @stack_fold_roundpd_ymm(<4 x double> %a0) {
|
|
; CHECK-LABEL: stack_fold_roundpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vroundpd $7, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7)
|
|
ret <4 x double> %2
|
|
}
|
|
declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
|
|
|
|
define <4 x float> @stack_fold_roundps(<4 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_roundps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vroundps $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7)
|
|
ret <4 x float> %2
|
|
}
|
|
declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
|
|
|
|
define <8 x float> @stack_fold_roundps_ymm(<8 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_roundps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vroundps $7, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7)
|
|
ret <8 x float> %2
|
|
}
|
|
declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
|
|
|
|
define double @stack_fold_roundsd(double %a0) optsize {
|
|
; CHECK-LABEL: stack_fold_roundsd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vxorps %xmm15, %xmm15, %xmm15
|
|
; CHECK-NEXT: vroundsd $9, {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 8-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call double @llvm.floor.f64(double %a0)
|
|
ret double %2
|
|
}
|
|
|
|
define double @stack_fold_roundsd_minsize(double %a0) minsize {
|
|
; CHECK-LABEL: stack_fold_roundsd_minsize:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vroundsd $9, {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 8-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call double @llvm.floor.f64(double %a0)
|
|
ret double %2
|
|
}
|
|
declare double @llvm.floor.f64(double) nounwind readnone
|
|
|
|
define <2 x double> @stack_fold_roundsd_int(<2 x double> %a0, <2 x double> %a1) optsize {
|
|
; CHECK-LABEL: stack_fold_roundsd_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
|
|
; CHECK-NEXT: vroundsd $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7)
|
|
ret <2 x double> %2
|
|
}
|
|
declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
|
|
|
|
define float @stack_fold_roundss(float %a0) optsize {
|
|
; CHECK-LABEL: stack_fold_roundss:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vxorps %xmm15, %xmm15, %xmm15
|
|
; CHECK-NEXT: vroundss $9, {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 4-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call float @llvm.floor.f32(float %a0)
|
|
ret float %2
|
|
}
|
|
declare float @llvm.floor.f32(float) nounwind readnone
|
|
|
|
define <4 x float> @stack_fold_roundss_int(<4 x float> %a0, <4 x float> %a1) optsize {
|
|
; CHECK-LABEL: stack_fold_roundss_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
|
|
; CHECK-NEXT: vroundss $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7)
|
|
ret <4 x float> %2
|
|
}
|
|
declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
|
|
|
|
; TODO stack_fold_rsqrtps
|
|
|
|
define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_rsqrtps_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vrsqrtps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
|
|
ret <4 x float> %2
|
|
}
|
|
declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
|
|
|
|
; TODO stack_fold_rsqrtps_ymm
|
|
|
|
define <8 x float> @stack_fold_rsqrtps_ymm_int(<8 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_rsqrtps_ymm_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vrsqrtps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0)
|
|
ret <8 x float> %2
|
|
}
|
|
declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
|
|
|
|
; TODO stack_fold_rsqrtss
|
|
; TODO stack_fold_rsqrtss_int
|
|
|
|
define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_shufpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vshufpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = xmm0[1],mem[0]
|
|
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
|
|
; fadd forces execution domain
|
|
%3 = fadd <2 x double> %2, <double 0x0, double 0x0>
|
|
ret <2 x double> %3
|
|
}
|
|
|
|
define <4 x double> @stack_fold_shufpd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_shufpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vshufpd $5, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: # ymm0 = ymm0[1],mem[0],ymm0[3],mem[2]
|
|
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 4, i32 3, i32 6>
|
|
; fadd forces execution domain
|
|
%3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
|
|
ret <4 x double> %3
|
|
}
|
|
|
|
define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_shufps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vshufps $200, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = xmm0[0,2],mem[0,3]
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7>
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
define <8 x float> @stack_fold_shufps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_shufps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vshufps $148, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: # ymm0 = ymm0[0,1],mem[1,2],ymm0[4,5],mem[5,6]
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 1, i32 9, i32 10, i32 4, i32 5, i32 13, i32 14>
|
|
ret <8 x float> %2
|
|
}
|
|
|
|
define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) {
|
|
; CHECK-LABEL: stack_fold_sqrtpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vsqrtpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a0)
|
|
ret <2 x double> %2
|
|
}
|
|
declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
|
|
|
|
define <4 x double> @stack_fold_sqrtpd_ymm(<4 x double> %a0) {
|
|
; CHECK-LABEL: stack_fold_sqrtpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vsqrtpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a0)
|
|
ret <4 x double> %2
|
|
}
|
|
declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
|
|
|
|
define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_sqrtps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vsqrtps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a0)
|
|
ret <4 x float> %2
|
|
}
|
|
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
|
|
|
|
define <8 x float> @stack_fold_sqrtps_ymm(<8 x float> %a0) {
|
|
; CHECK-LABEL: stack_fold_sqrtps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vsqrtps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %a0)
|
|
ret <8 x float> %2
|
|
}
|
|
declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
|
|
|
|
define double @stack_fold_sqrtsd(double %a0) optsize {
|
|
; CHECK-LABEL: stack_fold_sqrtsd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vxorps %xmm15, %xmm15, %xmm15
|
|
; CHECK-NEXT: vsqrtsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 8-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call double @llvm.sqrt.f64(double %a0)
|
|
ret double %2
|
|
}
|
|
declare double @llvm.sqrt.f64(double) nounwind readnone
|
|
|
|
; TODO stack_fold_sqrtsd_int
|
|
|
|
define float @stack_fold_sqrtss(float %a0) optsize {
|
|
; CHECK-LABEL: stack_fold_sqrtss:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vxorps %xmm15, %xmm15, %xmm15
|
|
; CHECK-NEXT: vsqrtss {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 4-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call float @llvm.sqrt.f32(float %a0)
|
|
ret float %2
|
|
}
|
|
declare float @llvm.sqrt.f32(float) nounwind readnone
|
|
|
|
; TODO stack_fold_sqrtss_int
|
|
|
|
define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_subpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vsubpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fsub <2 x double> %a0, %a1
|
|
ret <2 x double> %2
|
|
}
|
|
|
|
define <4 x double> @stack_fold_subpd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_subpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vsubpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fsub <4 x double> %a0, %a1
|
|
ret <4 x double> %2
|
|
}
|
|
|
|
define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_subps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vsubps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fsub <4 x float> %a0, %a1
|
|
ret <4 x float> %2
|
|
}
|
|
|
|
define <8 x float> @stack_fold_subps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_subps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vsubps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fsub <8 x float> %a0, %a1
|
|
ret <8 x float> %2
|
|
}
|
|
|
|
define double @stack_fold_subsd(double %a0, double %a1) {
|
|
; CHECK-LABEL: stack_fold_subsd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vsubsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 8-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fsub double %a0, %a1
|
|
ret double %2
|
|
}
|
|
|
|
define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_subsd_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vsubsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = extractelement <2 x double> %a0, i32 0
|
|
%3 = extractelement <2 x double> %a1, i32 0
|
|
%4 = fsub double %2, %3
|
|
%5 = insertelement <2 x double> %a0, double %4, i32 0
|
|
ret <2 x double> %5
|
|
}
|
|
|
|
define float @stack_fold_subss(float %a0, float %a1) {
|
|
; CHECK-LABEL: stack_fold_subss:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fsub float %a0, %a1
|
|
ret float %2
|
|
}
|
|
|
|
define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_subss_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = extractelement <4 x float> %a0, i32 0
|
|
%3 = extractelement <4 x float> %a1, i32 0
|
|
%4 = fsub float %2, %3
|
|
%5 = insertelement <4 x float> %a0, float %4, i32 0
|
|
ret <4 x float> %5
|
|
}
|
|
|
|
define i32 @stack_fold_testpd(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_testpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: vtestpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: setb %al
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1)
|
|
ret i32 %2
|
|
}
|
|
declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
|
|
|
|
define i32 @stack_fold_testpd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_testpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: vtestpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: setb %al
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1)
|
|
ret i32 %2
|
|
}
|
|
declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
|
|
|
define i32 @stack_fold_testps(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_testps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: vtestps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: setb %al
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1)
|
|
ret i32 %2
|
|
}
|
|
declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
|
|
|
|
define i32 @stack_fold_testps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_testps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: vtestps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: setb %al
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1)
|
|
ret i32 %2
|
|
}
|
|
declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
|
|
|
define i32 @stack_fold_ucomisd(double %a0, double %a1) {
|
|
; CHECK-LABEL: stack_fold_ucomisd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: vucomisd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
|
|
; CHECK-NEXT: sete %al
|
|
; CHECK-NEXT: leal -1(%rax,%rax), %eax
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fcmp ueq double %a0, %a1
|
|
%3 = select i1 %2, i32 1, i32 -1
|
|
ret i32 %3
|
|
}
|
|
|
|
define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_ucomisd_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vucomisd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: setnp %al
|
|
; CHECK-NEXT: sete %cl
|
|
; CHECK-NEXT: andb %al, %cl
|
|
; CHECK-NEXT: movzbl %cl, %eax
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
|
|
ret i32 %2
|
|
}
|
|
declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
|
|
|
|
define i32 @stack_fold_ucomiss(float %a0, float %a1) {
|
|
; CHECK-LABEL: stack_fold_ucomiss:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: vucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; CHECK-NEXT: sete %al
|
|
; CHECK-NEXT: leal -1(%rax,%rax), %eax
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = fcmp ueq float %a0, %a1
|
|
%3 = select i1 %2, i32 1, i32 -1
|
|
ret i32 %3
|
|
}
|
|
|
|
define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_ucomiss_int:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: setnp %al
|
|
; CHECK-NEXT: sete %cl
|
|
; CHECK-NEXT: andb %al, %cl
|
|
; CHECK-NEXT: movzbl %cl, %eax
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
|
|
ret i32 %2
|
|
}
|
|
declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
|
|
|
|
define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_unpckhpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vunpckhpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = xmm0[1],mem[1]
|
|
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
|
|
; fadd forces execution domain
|
|
%3 = fadd <2 x double> %2, <double 0x0, double 0x0>
|
|
ret <2 x double> %3
|
|
}
|
|
|
|
define <4 x double> @stack_fold_unpckhpd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_unpckhpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vunpckhpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: # ymm0 = ymm0[1],mem[1],ymm0[3],mem[3]
|
|
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
|
|
; fadd forces execution domain
|
|
%3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
|
|
ret <4 x double> %3
|
|
}
|
|
|
|
define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_unpckhps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vunpckhps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = xmm0[2],mem[2],xmm0[3],mem[3]
|
|
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
|
|
; fadd forces execution domain
|
|
%3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
|
|
ret <4 x float> %3
|
|
}
|
|
|
|
define <8 x float> @stack_fold_unpckhps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_unpckhps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vunpckhps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: # ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
|
|
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
|
|
; fadd forces execution domain
|
|
%3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
|
|
ret <8 x float> %3
|
|
}
|
|
|
|
define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_unpcklpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
|
|
; fadd forces execution domain
|
|
%3 = fadd <2 x double> %2, <double 0x0, double 0x0>
|
|
ret <2 x double> %3
|
|
}
|
|
|
|
define <4 x double> @stack_fold_unpcklpd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_unpcklpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: # ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
|
|
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
|
|
; fadd forces execution domain
|
|
%3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
|
|
ret <4 x double> %3
|
|
}
|
|
|
|
define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_unpcklps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vunpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
|
|
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
|
|
; fadd forces execution domain
|
|
%3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
|
|
ret <4 x float> %3
|
|
}
|
|
|
|
define <8 x float> @stack_fold_unpcklps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_unpcklps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vunpcklps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
|
|
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
|
|
; fadd forces execution domain
|
|
%3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
|
|
ret <8 x float> %3
|
|
}
|
|
|
|
define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_xorpd:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vxorpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = bitcast <2 x double> %a0 to <2 x i64>
|
|
%3 = bitcast <2 x double> %a1 to <2 x i64>
|
|
%4 = xor <2 x i64> %2, %3
|
|
%5 = bitcast <2 x i64> %4 to <2 x double>
|
|
; fadd forces execution domain
|
|
%6 = fadd <2 x double> %5, <double 0x0, double 0x0>
|
|
ret <2 x double> %6
|
|
}
|
|
|
|
define <4 x double> @stack_fold_xorpd_ymm(<4 x double> %a0, <4 x double> %a1) {
|
|
; CHECK-LABEL: stack_fold_xorpd_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vxorpd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = bitcast <4 x double> %a0 to <4 x i64>
|
|
%3 = bitcast <4 x double> %a1 to <4 x i64>
|
|
%4 = xor <4 x i64> %2, %3
|
|
%5 = bitcast <4 x i64> %4 to <4 x double>
|
|
; fadd forces execution domain
|
|
%6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0>
|
|
ret <4 x double> %6
|
|
}
|
|
|
|
define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_xorps:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vxorps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = bitcast <4 x float> %a0 to <2 x i64>
|
|
%3 = bitcast <4 x float> %a1 to <2 x i64>
|
|
%4 = xor <2 x i64> %2, %3
|
|
%5 = bitcast <2 x i64> %4 to <4 x float>
|
|
; fadd forces execution domain
|
|
%6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
|
|
ret <4 x float> %6
|
|
}
|
|
|
|
define <8 x float> @stack_fold_xorps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
|
; CHECK-LABEL: stack_fold_xorps_ymm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: #APP
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: #NO_APP
|
|
; CHECK-NEXT: vxorps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
|
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
%2 = bitcast <8 x float> %a0 to <4 x i64>
|
|
%3 = bitcast <8 x float> %a1 to <4 x i64>
|
|
%4 = xor <4 x i64> %2, %3
|
|
%5 = bitcast <4 x i64> %4 to <8 x float>
|
|
; fadd forces execution domain
|
|
%6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
|
|
ret <8 x float> %6
|
|
}
|
|
|
|
attributes #0 = { "unsafe-fp-math"="false" }
|
|
attributes #1 = { "unsafe-fp-math"="true" }
|