From 5dd160058f68d4df22ead31d8e1fab657e5bb8fc Mon Sep 17 00:00:00 2001 From: Jaydeep Chauhan Date: Thu, 26 Feb 2026 00:35:11 -0800 Subject: [PATCH] [X86] Handle VPMADD52L for smaller min-legal-vector-width (#183250) There is crash as below https://godbolt.org/z/qdE1EE4Y9, After https://github.com/llvm/llvm-project/pull/171760 . ``` ReplaceNodeResults: t32: v8i64 = X86ISD::VPMADD52L t10, t22, t2 Do not know how to custom type legalize this operation! ``` For ```"min-legal-vector-width"="512"```, it works fine, but for smaller value it is crash. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 18 +++++++++++ llvm/test/CodeGen/X86/combine-vpmadd52.ll | 37 +++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0620d242c0dc..60d2bcbac5da 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34277,6 +34277,24 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(Res); return; } + case X86ISD::VPMADD52L: { + SDLoc dl(N); + EVT VT = N->getValueType(0); + + SDValue Op0Lo, Op0Hi, Op1Lo, Op1Hi, Op2Lo, Op2Hi; + std::tie(Op0Lo, Op0Hi) = DAG.SplitVectorOperand(N, 0); + std::tie(Op1Lo, Op1Hi) = DAG.SplitVectorOperand(N, 1); + std::tie(Op2Lo, Op2Hi) = DAG.SplitVectorOperand(N, 2); + + EVT HalfVT = Op0Lo.getValueType(); + SDValue ResLo = + DAG.getNode(N->getOpcode(), dl, HalfVT, Op0Lo, Op1Lo, Op2Lo); + SDValue ResHi = + DAG.getNode(N->getOpcode(), dl, HalfVT, Op0Hi, Op1Hi, Op2Hi); + + Results.push_back(DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ResLo, ResHi)); + return; + } case X86ISD::STRICT_CVTPH2PS: { EVT VT = N->getValueType(0); SDValue Lo, Hi; diff --git a/llvm/test/CodeGen/X86/combine-vpmadd52.ll b/llvm/test/CodeGen/X86/combine-vpmadd52.ll index 8b741e9ef948..b6dfef064de2 100644 --- a/llvm/test/CodeGen/X86/combine-vpmadd52.ll +++ b/llvm/test/CodeGen/X86/combine-vpmadd52.ll @@ -455,3 +455,40 @@ define <2 x i64> @test_vpmadd52h_mul_one(<2 x i64> %x0, <2 x i64> %x1) { %ifma = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> splat(i64 1), <2 x i64> %x1) ret <2 x i64> %ifma } + +define <8 x i64> @test_vpmadd52luq_small_vector_width(<8 x i64> %a, <8 x i64> %b) #0 { +; AVX512-LABEL: test_vpmadd52luq_small_vector_width: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm4 = [8589934591,8589934591,8589934591,8589934591] +; AVX512-NEXT: vpand %ymm4, %ymm1, %ymm5 +; AVX512-NEXT: vpand %ymm4, %ymm0, %ymm4 +; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm0 = [524287,524287,524287,524287] +; AVX512-NEXT: vpand %ymm0, %ymm3, %ymm3 +; AVX512-NEXT: vpand %ymm0, %ymm2, %ymm2 +; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vpmadd52luq %ymm2, %ymm4, %ymm0 +; AVX512-NEXT: vpmadd52luq %ymm3, %ymm5, %ymm1 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52luq_small_vector_width: +; AVX: # %bb.0: +; AVX-NEXT: vpbroadcastq {{.*#+}} ymm4 = [8589934591,8589934591,8589934591,8589934591] +; AVX-NEXT: vpand %ymm4, %ymm1, %ymm5 +; AVX-NEXT: vpand %ymm4, %ymm0, %ymm4 +; AVX-NEXT: vpbroadcastq {{.*#+}} ymm0 = [524287,524287,524287,524287] +; AVX-NEXT: vpand %ymm0, %ymm3, %ymm3 +; AVX-NEXT: vpand %ymm0, %ymm2, %ymm2 +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; AVX-NEXT: {vex} vpmadd52luq %ymm2, %ymm4, %ymm0 +; AVX-NEXT: {vex} vpmadd52luq %ymm3, %ymm5, %ymm1 +; AVX-NEXT: retq + %a_masked = and <8 x i64> %a, splat (i64 8589934591) + %b_masked = and <8 x i64> %b, splat (i64 524287) + + %res = mul <8 x i64> %a_masked, %b_masked + ret <8 x i64> %res +} + +attributes #0 = { "min-legal-vector-width"="0" "target-cpu"="tigerlake" }