[X86] canonicalizeShuffleWithOp - initial support for shuffle(cvt(x),cvt(y)) -> cvt(shuffle(x,y))
Initial support is just for UNPCKL(CVTPH2PS(X),CVTPH2PS(Y)) -> CVTPH2PS(UNPCKL(X,Y)) Making this more general for other shuffles/conversions will have to be done carefully as we have to handle changes in src/dst element width, so I just handled the CVTPH2PS regression case. Fixes #83414
This commit is contained in:
parent
2eb40aadda
commit
f30f7a084c
@ -41500,6 +41500,21 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
|
||||
ShuffleVT,
|
||||
DAG.getNode(SrcOpcode, DL, OpVT, DAG.getBitcast(OpVT, Res)));
|
||||
}
|
||||
// TODO: We can generalize this for other shuffles/conversions.
|
||||
if (Opc == X86ISD::UNPCKL && SrcOpcode == X86ISD::CVTPH2PS &&
|
||||
N1.getOpcode() == SrcOpcode &&
|
||||
N0.getValueType() == N1.getValueType() &&
|
||||
N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
|
||||
ShuffleVT.getScalarSizeInBits() == N0.getScalarValueSizeInBits() &&
|
||||
IsSafeToMoveShuffle(N0, SrcOpcode) &&
|
||||
IsSafeToMoveShuffle(N1, SrcOpcode)) {
|
||||
EVT OpSrcVT = N0.getOperand(0).getValueType();
|
||||
EVT OpDstVT = N0.getValueType();
|
||||
SDValue Res =
|
||||
DAG.getNode(Opc, DL, OpSrcVT, N0.getOperand(0), N1.getOperand(0));
|
||||
return DAG.getBitcast(ShuffleVT,
|
||||
DAG.getNode(SrcOpcode, DL, OpDstVT, Res));
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -4966,22 +4966,18 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind {
|
||||
;
|
||||
; F16C-LABEL: fptosi_2f16_to_4i32:
|
||||
; F16C: # %bb.0:
|
||||
; F16C-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; F16C-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; F16C-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; F16C-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; F16C-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; F16C-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: fptosi_2f16_to_4i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512-NEXT: retq
|
||||
@ -5084,11 +5080,9 @@ define <4 x i32> @fptoui_2f16_to_4i32(<2 x half> %a) nounwind {
|
||||
;
|
||||
; F16C-LABEL: fptoui_2f16_to_4i32:
|
||||
; F16C: # %bb.0:
|
||||
; F16C-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; F16C-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; F16C-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; F16C-NEXT: vcvttps2dq %xmm0, %xmm1
|
||||
; F16C-NEXT: vpsrad $31, %xmm1, %xmm2
|
||||
; F16C-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
@ -5100,11 +5094,9 @@ define <4 x i32> @fptoui_2f16_to_4i32(<2 x half> %a) nounwind {
|
||||
;
|
||||
; AVX512F-LABEL: fptoui_2f16_to_4i32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
@ -5112,11 +5104,9 @@ define <4 x i32> @fptoui_2f16_to_4i32(<2 x half> %a) nounwind {
|
||||
;
|
||||
; AVX512-FASTLANE-LABEL: fptoui_2f16_to_4i32:
|
||||
; AVX512-FASTLANE: # %bb.0:
|
||||
; AVX512-FASTLANE-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512-FASTLANE-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX512-FASTLANE-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX512-FASTLANE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512-FASTLANE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; AVX512-FASTLANE-NEXT: vcvttps2udq %xmm0, %xmm0
|
||||
; AVX512-FASTLANE-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512-FASTLANE-NEXT: retq
|
||||
|
Loading…
x
Reference in New Issue
Block a user