[X86] SimplifyDemandedVectorEltsForTargetNode - don't split X86ISD::CVTTP2UI nodes without AVX512VL (#154504)

Unlike CVTTP2SI, CVTTP2UI is only available on AVX512 targets, so we
don't fallback to the AVX1 variant when we split a 512-bit vector, so we
can only use the 128/256-bit variants if we have AVX512VL.

Fixes #154492

(cherry picked from commit d770567a514716cdb250a2dee635435c22622e34)
This commit is contained in:
Simon Pilgrim 2025-08-20 12:18:10 +01:00 committed by Tobias Hieta
parent 65b0685821
commit 5d71f7c2ac
No known key found for this signature in database
2 changed files with 25 additions and 1 deletions

View File

@ -44178,8 +44178,12 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
}
// Conversions.
// TODO: Add more CVT opcodes when we have test coverage.
case X86ISD::CVTTP2SI:
case X86ISD::CVTTP2UI: {
if (!Subtarget.hasVLX())
break;
[[fallthrough]];
}
case X86ISD::CVTTP2SI: {
if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f16 &&
!Subtarget.hasVLX())
break;

View File

@ -0,0 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512VL
define <16 x i32> @PR154492() {
; AVX512F-LABEL: PR154492:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
; AVX512F-NEXT: vmovaps %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: PR154492:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vcvttps2udq %ymm0, %ymm0
; AVX512VL-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> zeroinitializer, <16 x i32> zeroinitializer, i16 255, i32 4)
ret <16 x i32> %res
}