[X86] getFauxShuffleMask - add ISD::ROTL/ROTR handling (#184417)
Very similar to the existing X86ISD::VROTLI/VROTRI handling Prep work for #184002
This commit is contained in:
parent
dc44bcafe0
commit
dc1e3e5dbf
@ -6918,6 +6918,37 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
|
||||
}
|
||||
return true;
|
||||
}
|
||||
case ISD::ROTL:
|
||||
case ISD::ROTR: {
|
||||
APInt UndefElts;
|
||||
SmallVector<APInt, 32> EltBits;
|
||||
if (!getTargetConstantBitsFromNode(N.getOperand(1), NumBitsPerElt,
|
||||
UndefElts, EltBits,
|
||||
/*AllowWholeUndefs*/ true,
|
||||
/*AllowPartialUndefs*/ false))
|
||||
return false;
|
||||
|
||||
// We can only decode 'whole byte' bit rotates as shuffles.
|
||||
for (unsigned I = 0; I != NumElts; ++I)
|
||||
if (DemandedElts[I] && !UndefElts[I] &&
|
||||
(EltBits[I].urem(NumBitsPerElt) % 8) != 0)
|
||||
return false;
|
||||
|
||||
Ops.push_back(N.getOperand(0));
|
||||
for (unsigned I = 0; I != NumElts; ++I) {
|
||||
if (!DemandedElts[I] || UndefElts[I]) {
|
||||
Mask.append(NumBytesPerElt, SM_SentinelUndef);
|
||||
continue;
|
||||
}
|
||||
int Offset = EltBits[I].urem(NumBitsPerElt) / 8;
|
||||
Offset = (ISD::ROTL == Opcode ? NumBytesPerElt - Offset : Offset);
|
||||
int BaseIdx = I * NumBytesPerElt;
|
||||
for (int J = 0; J != (int)NumBytesPerElt; ++J) {
|
||||
Mask.push_back(BaseIdx + ((Offset + J) % NumBytesPerElt));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
case X86ISD::VROTLI:
|
||||
case X86ISD::VROTRI: {
|
||||
// We can only decode 'whole byte' bit rotates as shuffles.
|
||||
|
||||
@ -250,17 +250,10 @@ define <16 x i8> @combine_vpperm_as_proti_v8i16(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
}
|
||||
|
||||
define <16 x i8> @combine_shuffle_prot_v2i64(<2 x i64> %a0) {
|
||||
; X86-LABEL: combine_shuffle_prot_v2i64:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vprotq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
|
||||
; X86-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: combine_shuffle_prot_v2i64:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vprotq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; X64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
|
||||
; X64-NEXT: retq
|
||||
; CHECK-LABEL: combine_shuffle_prot_v2i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,8,15,14,13,12,11,10,6,5,4,3,2,1,0,7]
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a0, <2 x i64> %a0, <2 x i64> <i64 56, i64 16>)
|
||||
%2 = bitcast <2 x i64> %1 to <16 x i8>
|
||||
%3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
|
||||
@ -280,17 +273,10 @@ define <16 x i8> @combine_shuffle_proti_v2i64(<2 x i64> %a0) {
|
||||
declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
|
||||
|
||||
define <16 x i8> @combine_shuffle_prot_v4i32(<4 x i32> %a0) {
|
||||
; X86-LABEL: combine_shuffle_prot_v4i32:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
|
||||
; X86-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: combine_shuffle_prot_v4i32:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; X64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
|
||||
; X64-NEXT: retq
|
||||
; CHECK-LABEL: combine_shuffle_prot_v4i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,15,14,13,9,8,11,10,6,5,4,7,3,2,1,0]
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a0, <4 x i32> %a0, <4 x i32> <i32 0, i32 8, i32 16, i32 24>)
|
||||
%2 = bitcast <4 x i32> %1 to <16 x i8>
|
||||
%3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user