[X86] getFauxShuffleMask - add ISD::ROTL/ROTR handling (#184417)

Very similar to the existing X86ISD::VROTLI/VROTRI handling

Prep work for #184002
This commit is contained in:
Simon Pilgrim 2026-03-03 20:52:38 +00:00 committed by GitHub
parent dc44bcafe0
commit dc1e3e5dbf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 39 additions and 22 deletions

View File

@ -6918,6 +6918,37 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
}
return true;
}
case ISD::ROTL:
case ISD::ROTR: {
APInt UndefElts;
SmallVector<APInt, 32> EltBits;
if (!getTargetConstantBitsFromNode(N.getOperand(1), NumBitsPerElt,
UndefElts, EltBits,
/*AllowWholeUndefs*/ true,
/*AllowPartialUndefs*/ false))
return false;
// We can only decode 'whole byte' bit rotates as shuffles.
for (unsigned I = 0; I != NumElts; ++I)
if (DemandedElts[I] && !UndefElts[I] &&
(EltBits[I].urem(NumBitsPerElt) % 8) != 0)
return false;
Ops.push_back(N.getOperand(0));
for (unsigned I = 0; I != NumElts; ++I) {
if (!DemandedElts[I] || UndefElts[I]) {
Mask.append(NumBytesPerElt, SM_SentinelUndef);
continue;
}
int Offset = EltBits[I].urem(NumBitsPerElt) / 8;
Offset = (ISD::ROTL == Opcode ? NumBytesPerElt - Offset : Offset);
int BaseIdx = I * NumBytesPerElt;
for (int J = 0; J != (int)NumBytesPerElt; ++J) {
Mask.push_back(BaseIdx + ((Offset + J) % NumBytesPerElt));
}
}
return true;
}
case X86ISD::VROTLI:
case X86ISD::VROTRI: {
// We can only decode 'whole byte' bit rotates as shuffles.

View File

@ -250,17 +250,10 @@ define <16 x i8> @combine_vpperm_as_proti_v8i16(<16 x i8> %a0, <16 x i8> %a1) {
}
define <16 x i8> @combine_shuffle_prot_v2i64(<2 x i64> %a0) {
; X86-LABEL: combine_shuffle_prot_v2i64:
; X86: # %bb.0:
; X86-NEXT: vprotq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X86-NEXT: retl
;
; X64-LABEL: combine_shuffle_prot_v2i64:
; X64: # %bb.0:
; X64-NEXT: vprotq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X64-NEXT: retq
; CHECK-LABEL: combine_shuffle_prot_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,8,15,14,13,12,11,10,6,5,4,3,2,1,0,7]
; CHECK-NEXT: ret{{[l|q]}}
%1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a0, <2 x i64> %a0, <2 x i64> <i64 56, i64 16>)
%2 = bitcast <2 x i64> %1 to <16 x i8>
%3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@ -280,17 +273,10 @@ define <16 x i8> @combine_shuffle_proti_v2i64(<2 x i64> %a0) {
declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
define <16 x i8> @combine_shuffle_prot_v4i32(<4 x i32> %a0) {
; X86-LABEL: combine_shuffle_prot_v4i32:
; X86: # %bb.0:
; X86-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X86-NEXT: retl
;
; X64-LABEL: combine_shuffle_prot_v4i32:
; X64: # %bb.0:
; X64-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X64-NEXT: retq
; CHECK-LABEL: combine_shuffle_prot_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,15,14,13,9,8,11,10,6,5,4,7,3,2,1,0]
; CHECK-NEXT: ret{{[l|q]}}
%1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a0, <4 x i32> %a0, <4 x i32> <i32 0, i32 8, i32 16, i32 24>)
%2 = bitcast <4 x i32> %1 to <16 x i8>
%3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>