[X86] LowerVSELECT - split v16i16/v32i8 pre-AVX2 VSELECT ops if enough of the operands are free to split.
Often on AVX1 we're better off consistently using 128-bit instructions, so recognise when the operands are loads that can be freely/cheaply split - ideally this functionality needs to be moved to isFreeToSplitVector but we're using it in a few places where we don't want to split loads yet. Based off a regression reported after #92794
This commit is contained in:
parent
654cd94629
commit
b52962d1b8
@ -17846,6 +17846,22 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
return DAG.getNode(ISD::VSELECT, dl, VT, Cond, LHS, RHS);
|
||||
}
|
||||
|
||||
// v16i16/v32i8 selects without AVX2, if the condition and another operand
|
||||
// are free to split, then better to split before expanding the
|
||||
// select. Don't bother with XOP as it has the fast VPCMOV instruction.
|
||||
// TODO: This is very similar to narrowVectorSelect.
|
||||
// TODO: Add Load splitting to isFreeToSplitVector ?
|
||||
if (EltSize < 32 && VT.is256BitVector() && !Subtarget.hasAVX2() &&
|
||||
!Subtarget.hasXOP()) {
|
||||
bool FreeCond = isFreeToSplitVector(Cond.getNode(), DAG);
|
||||
bool FreeLHS = isFreeToSplitVector(LHS.getNode(), DAG) ||
|
||||
(ISD::isNormalLoad(LHS.getNode()) && LHS.hasOneUse());
|
||||
bool FreeRHS = isFreeToSplitVector(RHS.getNode(), DAG) ||
|
||||
(ISD::isNormalLoad(RHS.getNode()) && RHS.hasOneUse());
|
||||
if (FreeCond && (FreeLHS || FreeRHS))
|
||||
return splitVectorOp(Op, DAG, dl);
|
||||
}
|
||||
|
||||
// Only some types will be legal on some subtargets. If we can emit a legal
|
||||
// VSELECT-matching blend, return Op, and but if we need to expand, return
|
||||
// a null value.
|
||||
|
@ -1509,16 +1509,16 @@ define void @store_blend_load_v16i16(ptr %a0, ptr %a1, ptr %a2) {
|
||||
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8]
|
||||
; AVX1-NEXT: vpmaxuw %xmm2, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmaxuw %xmm2, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vandnps (%rsi), %ymm0, %ymm1
|
||||
; AVX1-NEXT: vandps (%rdi), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vmovaps %ymm0, (%rdx)
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: vpmaxuw %xmm2, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpmaxuw %xmm2, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vmovdqa (%rsi), %xmm4
|
||||
; AVX1-NEXT: vmovdqa 16(%rsi), %xmm5
|
||||
; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm5, %xmm1
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm0, %xmm4, %xmm0
|
||||
; AVX1-NEXT: vmovdqa %xmm0, (%rdx)
|
||||
; AVX1-NEXT: vmovdqa %xmm1, 16(%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: store_blend_load_v16i16:
|
||||
@ -1578,16 +1578,16 @@ define void @store_blend_load_v32i8(ptr %a0, ptr %a1, ptr %a2) {
|
||||
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
|
||||
; AVX1-NEXT: vpmaxub %xmm2, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vpcmpeqb %xmm3, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vandnps (%rsi), %ymm0, %ymm1
|
||||
; AVX1-NEXT: vandps (%rdi), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vmovaps %ymm0, (%rdx)
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpmaxub %xmm2, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vmovdqa (%rsi), %xmm4
|
||||
; AVX1-NEXT: vmovdqa 16(%rsi), %xmm5
|
||||
; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm5, %xmm1
|
||||
; AVX1-NEXT: vpblendvb %xmm3, %xmm0, %xmm4, %xmm0
|
||||
; AVX1-NEXT: vmovdqa %xmm0, (%rdx)
|
||||
; AVX1-NEXT: vmovdqa %xmm1, 16(%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: store_blend_load_v32i8:
|
||||
|
Loading…
x
Reference in New Issue
Block a user