[DAG] SimplifyDemandedVectorElts - add handling for INT<->FP conversions (#117884)
This commit is contained in:
parent
154c7c0bf2
commit
b1a48af56a
@ -3736,6 +3736,15 @@ bool TargetLowering::SimplifyDemandedVectorElts(
|
||||
KnownUndef.clearAllBits();
|
||||
}
|
||||
break;
|
||||
case ISD::SINT_TO_FP:
|
||||
case ISD::UINT_TO_FP:
|
||||
case ISD::FP_TO_SINT:
|
||||
case ISD::FP_TO_UINT:
|
||||
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
|
||||
KnownZero, TLO, Depth + 1))
|
||||
return true;
|
||||
// Don't fall through to generic undef -> undef handling.
|
||||
return false;
|
||||
default: {
|
||||
if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
|
||||
if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
|
||||
|
@ -11,9 +11,9 @@ declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #0
|
||||
define void @draw_llvm_vs_variant0(<4 x float> %x) {
|
||||
; CHECK-LABEL: draw_llvm_vs_variant0:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxsd v3, 0(r3)
|
||||
; CHECK-NEXT: vmrghh v3, v3, v3
|
||||
; CHECK-NEXT: lxsihzx v3, 0, r3
|
||||
; CHECK-NEXT: vextsh2w v3, v3
|
||||
; CHECK-NEXT: xxmrghw v3, v3, v3
|
||||
; CHECK-NEXT: xvcvsxwsp vs0, v3
|
||||
; CHECK-NEXT: xxspltw vs0, vs0, 2
|
||||
; CHECK-NEXT: xvmaddasp vs0, v2, v2
|
||||
|
@ -85,8 +85,7 @@ define i64 @test_signed_i64_f64(double %f) nounwind {
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; X86-NEXT: vcvttpd2qq %xmm1, %xmm1
|
||||
; X86-NEXT: vcvttpd2qq %xmm0, %xmm1
|
||||
; X86-NEXT: vmovd %xmm1, %esi
|
||||
; X86-NEXT: xorl %ecx, %ecx
|
||||
; X86-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
|
@ -141,56 +141,61 @@ declare <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half>, <8 x i16>)
|
||||
define <8 x half> @fmul_pow2_8xhalf(<8 x i16> %i) {
|
||||
; CHECK-SSE-LABEL: fmul_pow2_8xhalf:
|
||||
; CHECK-SSE: # %bb.0:
|
||||
; CHECK-SSE-NEXT: subq $88, %rsp
|
||||
; CHECK-SSE-NEXT: .cfi_def_cfa_offset 96
|
||||
; CHECK-SSE-NEXT: subq $104, %rsp
|
||||
; CHECK-SSE-NEXT: .cfi_def_cfa_offset 112
|
||||
; CHECK-SSE-NEXT: movdqa %xmm0, %xmm1
|
||||
; CHECK-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
|
||||
; CHECK-SSE-NEXT: pslld $23, %xmm1
|
||||
; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1065353216,1065353216,1065353216,1065353216]
|
||||
; CHECK-SSE-NEXT: paddd %xmm2, %xmm1
|
||||
; CHECK-SSE-NEXT: cvttps2dq %xmm1, %xmm1
|
||||
; CHECK-SSE-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill
|
||||
; CHECK-SSE-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; CHECK-SSE-NEXT: pslld $16, %xmm1
|
||||
; CHECK-SSE-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill
|
||||
; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; CHECK-SSE-NEXT: pslld $23, %xmm0
|
||||
; CHECK-SSE-NEXT: paddd %xmm2, %xmm0
|
||||
; CHECK-SSE-NEXT: cvttps2dq %xmm0, %xmm0
|
||||
; CHECK-SSE-NEXT: pslld $16, %xmm0
|
||||
; CHECK-SSE-NEXT: psrld $16, %xmm0
|
||||
; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
|
||||
; CHECK-SSE-NEXT: pslld $16, %xmm0
|
||||
; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; CHECK-SSE-NEXT: psrld $16, %xmm0
|
||||
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE-NEXT: cvtdq2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-SSE-NEXT: # xmm0 = mem[2,3,2,3]
|
||||
; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
|
||||
; CHECK-SSE-NEXT: psrlq $48, %xmm0
|
||||
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE-NEXT: pshufd $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-SSE-NEXT: # xmm0 = mem[3,3,3,3]
|
||||
; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
|
||||
; CHECK-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
||||
; CHECK-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; CHECK-SSE-NEXT: cvtdq2ps %xmm1, %xmm0
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
|
||||
; CHECK-SSE-NEXT: psrld $16, %xmm0
|
||||
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
|
||||
; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; CHECK-SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
|
||||
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
|
||||
; CHECK-SSE-NEXT: psrlq $48, %xmm0
|
||||
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE-NEXT: cvtdq2ps (%rsp), %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE-NEXT: pshufd $238, (%rsp), %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-SSE-NEXT: # xmm0 = mem[2,3,2,3]
|
||||
; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
|
||||
; CHECK-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE-NEXT: pshufd $255, (%rsp), %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-SSE-NEXT: # xmm0 = mem[3,3,3,3]
|
||||
; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
|
||||
; CHECK-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7]
|
||||
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
|
||||
@ -202,24 +207,8 @@ define <8 x half> @fmul_pow2_8xhalf(<8 x i16> %i) {
|
||||
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
|
||||
; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: punpcklwd (%rsp), %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
|
||||
; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
|
||||
; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
|
||||
; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
||||
; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
|
||||
; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
|
||||
; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload
|
||||
; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; CHECK-SSE-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
||||
; CHECK-SSE-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
|
||||
; CHECK-SSE-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill
|
||||
; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
|
||||
; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
|
||||
@ -234,26 +223,41 @@ define <8 x half> @fmul_pow2_8xhalf(<8 x i16> %i) {
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
|
||||
; CHECK-SSE-NEXT: punpckldq (%rsp), %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
|
||||
; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
|
||||
; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
|
||||
; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
|
||||
; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
|
||||
; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
|
||||
; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload
|
||||
; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; CHECK-SSE-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill
|
||||
; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
|
||||
; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
|
||||
; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
|
||||
; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
|
||||
; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
||||
; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; CHECK-SSE-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
||||
; CHECK-SSE-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
|
||||
; CHECK-SSE-NEXT: punpcklqdq (%rsp), %xmm1 # 16-byte Folded Reload
|
||||
; CHECK-SSE-NEXT: # xmm1 = xmm1[0],mem[0]
|
||||
; CHECK-SSE-NEXT: movdqa %xmm1, %xmm0
|
||||
; CHECK-SSE-NEXT: addq $88, %rsp
|
||||
; CHECK-SSE-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
|
||||
; CHECK-SSE-NEXT: punpckldq (%rsp), %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
|
||||
; CHECK-SSE-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0]
|
||||
; CHECK-SSE-NEXT: addq $104, %rsp
|
||||
; CHECK-SSE-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-SSE-NEXT: retq
|
||||
;
|
||||
@ -1028,17 +1032,17 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
|
||||
; CHECK-SSE-NEXT: pslld $23, %xmm0
|
||||
; CHECK-SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; CHECK-SSE-NEXT: cvttps2dq %xmm0, %xmm0
|
||||
; CHECK-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; CHECK-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2,2,u,u,u,u,u,u]
|
||||
; CHECK-SSE-NEXT: pxor %xmm0, %xmm0
|
||||
; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; CHECK-SSE-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; CHECK-SSE-NEXT: cvtdq2ps %xmm1, %xmm0
|
||||
; CHECK-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; CHECK-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,2,u,u,u,u,u,u]
|
||||
; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; CHECK-SSE-NEXT: psrld $16, %xmm0
|
||||
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE-NEXT: pshufd $85, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-SSE-NEXT: # xmm0 = mem[1,1,1,1]
|
||||
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
|
||||
; CHECK-SSE-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
||||
; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; CHECK-SSE-NEXT: cvtdq2ps %xmm1, %xmm0
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
|
||||
; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
@ -1049,8 +1053,9 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
|
||||
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
|
||||
; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
|
||||
; CHECK-SSE-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
|
||||
; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
||||
; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; CHECK-SSE-NEXT: movdqa %xmm1, %xmm0
|
||||
; CHECK-SSE-NEXT: addq $40, %rsp
|
||||
; CHECK-SSE-NEXT: retq
|
||||
;
|
||||
|
@ -731,7 +731,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) nounwind {
|
||||
;
|
||||
; AVX512-LABEL: stest_f16i32:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
|
||||
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512-NEXT: vcvttps2qq %ymm0, %zmm0
|
||||
; AVX512-NEXT: vpmovsqd %ymm0, %xmm0
|
||||
; AVX512-NEXT: vzeroupper
|
||||
@ -894,7 +894,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
|
||||
;
|
||||
; AVX512-LABEL: utesth_f16i32:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
|
||||
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0
|
||||
; AVX512-NEXT: vpmovusqd %ymm0, %xmm0
|
||||
; AVX512-NEXT: vzeroupper
|
||||
@ -1031,7 +1031,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) nounwind {
|
||||
;
|
||||
; AVX512-LABEL: ustest_f16i32:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
|
||||
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512-NEXT: vcvttps2qq %ymm0, %zmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
|
||||
@ -3343,7 +3343,7 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) nounwind {
|
||||
;
|
||||
; AVX512-LABEL: stest_f16i32_mm:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
|
||||
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512-NEXT: vcvttps2qq %ymm0, %zmm0
|
||||
; AVX512-NEXT: vpmovsqd %ymm0, %xmm0
|
||||
; AVX512-NEXT: vzeroupper
|
||||
@ -3504,7 +3504,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
|
||||
;
|
||||
; AVX512-LABEL: utesth_f16i32_mm:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
|
||||
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0
|
||||
; AVX512-NEXT: vpmovusqd %ymm0, %xmm0
|
||||
; AVX512-NEXT: vzeroupper
|
||||
@ -3640,7 +3640,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) nounwind {
|
||||
;
|
||||
; AVX512-LABEL: ustest_f16i32_mm:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
|
||||
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512-NEXT: vcvttps2qq %ymm0, %zmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
|
||||
|
@ -630,13 +630,8 @@ define void @pr59677(i32 %x, ptr %out) nounwind {
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: pushl %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: vmovd %eax, %xmm0
|
||||
; X86-NEXT: orl $1, %eax
|
||||
; X86-NEXT: vmovd %eax, %xmm1
|
||||
; X86-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; X86-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-NEXT: vpaddd %xmm0, %xmm0, %xmm0
|
||||
; X86-NEXT: vcvtdq2ps %xmm0, %xmm0
|
||||
; X86-NEXT: vmovss %xmm0, (%esp)
|
||||
@ -651,10 +646,6 @@ define void @pr59677(i32 %x, ptr %out) nounwind {
|
||||
; X64-NEXT: pushq %rbx
|
||||
; X64-NEXT: movq %rsi, %rbx
|
||||
; X64-NEXT: vmovd %edi, %xmm0
|
||||
; X64-NEXT: orl $1, %edi
|
||||
; X64-NEXT: vmovd %edi, %xmm1
|
||||
; X64-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; X64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; X64-NEXT: vpaddd %xmm0, %xmm0, %xmm0
|
||||
; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
|
||||
; X64-NEXT: callq sinf@PLT
|
||||
|
@ -4966,8 +4966,6 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind {
|
||||
;
|
||||
; F16C-LABEL: fptosi_2f16_to_4i32:
|
||||
; F16C: # %bb.0:
|
||||
; F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; F16C-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; F16C-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
@ -4975,8 +4973,6 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind {
|
||||
;
|
||||
; AVX512-LABEL: fptosi_2f16_to_4i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
@ -5104,8 +5100,6 @@ define <4 x i32> @fptoui_2f16_to_4i32(<2 x half> %a) nounwind {
|
||||
;
|
||||
; AVX512-FASTLANE-LABEL: fptoui_2f16_to_4i32:
|
||||
; AVX512-FASTLANE: # %bb.0:
|
||||
; AVX512-FASTLANE-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX512-FASTLANE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512-FASTLANE-NEXT: vcvttps2udq %xmm0, %xmm0
|
||||
; AVX512-FASTLANE-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
@ -5212,7 +5206,7 @@ define <4 x i32> @fptoui_4f16_to_4i32(<4 x half> %a) nounwind {
|
||||
;
|
||||
; AVX512F-LABEL: fptoui_4f16_to_4i32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm0, %ymm0
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
|
@ -10,7 +10,7 @@ define void @convert_v2i16_to_v2f32(ptr %dst.addr, <2 x i16> %src) nounwind {
|
||||
; X86-SSE2-LABEL: convert_v2i16_to_v2f32:
|
||||
; X86-SSE2: # %bb.0: # %entry
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
|
||||
; X86-SSE2-NEXT: psrad $16, %xmm0
|
||||
; X86-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
|
||||
; X86-SSE2-NEXT: movlps %xmm0, (%eax)
|
||||
@ -26,7 +26,7 @@ define void @convert_v2i16_to_v2f32(ptr %dst.addr, <2 x i16> %src) nounwind {
|
||||
;
|
||||
; X64-SSE2-LABEL: convert_v2i16_to_v2f32:
|
||||
; X64-SSE2: # %bb.0: # %entry
|
||||
; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; X64-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
|
||||
; X64-SSE2-NEXT: psrad $16, %xmm0
|
||||
; X64-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
|
||||
; X64-SSE2-NEXT: movlps %xmm0, (%rdi)
|
||||
|
Loading…
x
Reference in New Issue
Block a user