From 4fa3b50fc38fb5665c40aea11c91e8e020be221e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 6 Sep 2016 06:56:59 +0000 Subject: [PATCH] [AVX-512] Fix masked VPERMI2PS isel when the index comes from a bitcast. We need to bitcast the index operand to a floating point type so that it matches the result type. If not then the passthru part of the DAG will be a bitcast from the index's original type to the destination type. This makes it very difficult to match. The other option would be to add 5 sets of patterns for every other possible type. llvm-svn: 280696 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +- llvm/lib/Target/X86/X86InstrAVX512.td | 81 +++++++------------- llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 8 +- llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 5 +- 4 files changed, 41 insertions(+), 61 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b44b18118e7f..3548b9e1c824 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -17869,19 +17869,21 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget } case VPERM_3OP_MASKZ: case VPERM_3OP_MASK:{ + MVT VT = Op.getSimpleValueType(); // Src2 is the PassThru SDValue Src1 = Op.getOperand(1); - SDValue Src2 = Op.getOperand(2); + // PassThru needs to be the same type as the destination in order + // to pattern match correctly. + SDValue Src2 = DAG.getBitcast(VT, Op.getOperand(2)); SDValue Src3 = Op.getOperand(3); SDValue Mask = Op.getOperand(4); - MVT VT = Op.getSimpleValueType(); SDValue PassThru = SDValue(); // set PassThru element if (IntrData->Type == VPERM_3OP_MASKZ) PassThru = getZeroVector(VT, Subtarget, DAG, dl); else - PassThru = DAG.getBitcast(VT, Src2); + PassThru = Src2; // Swap Src1 and Src2 in the node creation return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 59218bcf552f..469acea99496 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -299,22 +299,6 @@ multiclass AVX512_maskable_3src O, Format F, X86VectorVTInfo _, (vselect _.KRCWM:$mask, RHS, _.RC:$src1), vselect, "", NoItinerary, IsCommutable, IsKCommutable>; -// Similar to AVX512_maskable_3src but in this case the input VT for the tied -// operand differs from the output VT. This requires a bitconvert on -// the preserved vector going into the vselect. -multiclass AVX512_maskable_3src_cast O, Format F, X86VectorVTInfo OutVT, - X86VectorVTInfo InVT, - dag Outs, dag NonTiedIns, string OpcodeStr, - string AttSrcAsm, string IntelSrcAsm, - dag RHS> : - AVX512_maskable_common; - multiclass AVX512_maskable_3src_scalar O, Format F, X86VectorVTInfo _, dag Outs, dag NonTiedIns, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, @@ -1181,83 +1165,76 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", //===----------------------------------------------------------------------===// // -- VPERMI2 - 3 source operands form -- -multiclass avx512_perm_i opc, string OpcodeStr, - X86VectorVTInfo _, X86VectorVTInfo IdxVT> { +multiclass avx512_perm_i opc, string OpcodeStr, X86VectorVTInfo _> { let Constraints = "$src1 = $dst" in { - defm rr: AVX512_maskable_3src_cast, EVEX_4V, + (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V, AVX5128IBase; - defm rm: AVX512_maskable_3src_cast, EVEX_4V, AVX5128IBase; } } multiclass avx512_perm_i_mb opc, string OpcodeStr, - X86VectorVTInfo _, X86VectorVTInfo IdxVT> { + X86VectorVTInfo _> { let Constraints = "$src1 = $dst" in - defm rmb: AVX512_maskable_3src_cast, AVX5128IBase, EVEX_4V, EVEX_B; } multiclass avx512_perm_i_sizes opc, string OpcodeStr, - AVX512VLVectorVTInfo VTInfo, - AVX512VLVectorVTInfo ShuffleMask> { - defm NAME: avx512_perm_i, - avx512_perm_i_mb, EVEX_V512; + AVX512VLVectorVTInfo VTInfo> { + defm NAME: avx512_perm_i, + avx512_perm_i_mb, EVEX_V512; let Predicates = [HasVLX] in { - defm NAME#128: avx512_perm_i, - avx512_perm_i_mb, EVEX_V128; - defm NAME#256: avx512_perm_i, - avx512_perm_i_mb, EVEX_V256; + defm NAME#128: avx512_perm_i, + avx512_perm_i_mb, EVEX_V128; + defm NAME#256: avx512_perm_i, + avx512_perm_i_mb, EVEX_V256; } } multiclass avx512_perm_i_sizes_bw opc, string OpcodeStr, AVX512VLVectorVTInfo VTInfo, - AVX512VLVectorVTInfo Idx, Predicate Prd> { let Predicates = [Prd] in - defm NAME: avx512_perm_i, EVEX_V512; + defm NAME: avx512_perm_i, EVEX_V512; let Predicates = [Prd, HasVLX] in { - defm NAME#128: avx512_perm_i, EVEX_V128; - defm NAME#256: avx512_perm_i, EVEX_V256; + defm NAME#128: avx512_perm_i, EVEX_V128; + defm NAME#256: avx512_perm_i, EVEX_V256; } } defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", - avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; + avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", - avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; + avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", - avx512vl_i16_info, avx512vl_i16_info, HasBWI>, + avx512vl_i16_info, HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>; defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", - avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, + avx512vl_i8_info, HasVBMI>, EVEX_CD8<8, CD8VF>; defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", - avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; + avx512vl_f32_info>, EVEX_CD8<32, CD8VF>; defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", - avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; + avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>; // VPERMT2 multiclass avx512_perm_t opc, string OpcodeStr, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index d2a190530ee0..08b7e022841a 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -414,10 +414,12 @@ def X86VPermt2 : SDNode<"X86ISD::VPERMV3", SDTCisSameSizeAs<0,2>, SDTCisSameAs<0,3>]>, []>; +// Even though the index operand should be integer, we need to make it match the +// destination type so that we can pattern match the masked version where the +// index is also the passthru operand. def X86VPermi2X : SDNode<"X86ISD::VPERMIV3", - SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>, - SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>, - SDTCisSameSizeAs<0,1>, + SDTypeProfile<1, 3, [SDTCisVec<0>, + SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>, []>; diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index 2d7dca8025e4..e9d106ba642c 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -2030,9 +2030,8 @@ define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128_cast(<4 x float> % ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd9] -; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x77,0xda] -; CHECK-NEXT: vblendmps %xmm3, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc3] +; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca] +; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %x1cast = bitcast <2 x i64> %x1 to <4 x i32> %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1cast, <4 x float> %x2, i8 %x3)