Maryam Moghadas 934d5fa2b8 [PowerPC] Exploit xxperm, check for dead vectors and substitute vperm with xxperm
vperm instruction requires the data to be in the Altivec registers, if one of
the vector operands is not used after this vperm instruction then it can be
substituted by xxperm which doubles the number of available registers.

Reviewed By: stefanp

Differential Revision: https://reviews.llvm.org/D133700
2022-11-23 13:28:12 -06:00

406 lines
14 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-gnu-linux -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-LE
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-gnu-linux -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-BE
define <4 x i32> @vextsb2wLE(<16 x i8> %a) {
; CHECK-LE-LABEL: vextsb2wLE:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: vextsb2w 2, 2
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vextsb2wLE:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; CHECK-BE-NEXT: addi 3, 3, .LCPI0_0@toc@l
; CHECK-BE-NEXT: lxv 0, 0(3)
; CHECK-BE-NEXT: xxperm 34, 34, 0
; CHECK-BE-NEXT: vextsb2w 2, 2
; CHECK-BE-NEXT: blr
entry:
%vecext = extractelement <16 x i8> %a, i32 0
%conv = sext i8 %vecext to i32
%vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
%vecext1 = extractelement <16 x i8> %a, i32 4
%conv2 = sext i8 %vecext1 to i32
%vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
%vecext4 = extractelement <16 x i8> %a, i32 8
%conv5 = sext i8 %vecext4 to i32
%vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
%vecext7 = extractelement <16 x i8> %a, i32 12
%conv8 = sext i8 %vecext7 to i32
%vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
ret <4 x i32> %vecinit9
}
define <2 x i64> @vextsb2dLE(<16 x i8> %a) {
; CHECK-LE-LABEL: vextsb2dLE:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: vextsb2d 2, 2
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vextsb2dLE:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis 3, 2, .LCPI1_0@toc@ha
; CHECK-BE-NEXT: addi 3, 3, .LCPI1_0@toc@l
; CHECK-BE-NEXT: lxv 0, 0(3)
; CHECK-BE-NEXT: xxperm 34, 34, 0
; CHECK-BE-NEXT: vextsb2d 2, 2
; CHECK-BE-NEXT: blr
entry:
%vecext = extractelement <16 x i8> %a, i32 0
%conv = sext i8 %vecext to i64
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
%vecext1 = extractelement <16 x i8> %a, i32 8
%conv2 = sext i8 %vecext1 to i64
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
ret <2 x i64> %vecinit3
}
define <4 x i32> @vextsh2wLE(<8 x i16> %a) {
; CHECK-LE-LABEL: vextsh2wLE:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: vextsh2w 2, 2
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vextsh2wLE:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis 3, 2, .LCPI2_0@toc@ha
; CHECK-BE-NEXT: addi 3, 3, .LCPI2_0@toc@l
; CHECK-BE-NEXT: lxv 0, 0(3)
; CHECK-BE-NEXT: xxperm 34, 34, 0
; CHECK-BE-NEXT: vextsh2w 2, 2
; CHECK-BE-NEXT: blr
entry:
%vecext = extractelement <8 x i16> %a, i32 0
%conv = sext i16 %vecext to i32
%vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
%vecext1 = extractelement <8 x i16> %a, i32 2
%conv2 = sext i16 %vecext1 to i32
%vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
%vecext4 = extractelement <8 x i16> %a, i32 4
%conv5 = sext i16 %vecext4 to i32
%vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
%vecext7 = extractelement <8 x i16> %a, i32 6
%conv8 = sext i16 %vecext7 to i32
%vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
ret <4 x i32> %vecinit9
}
define <2 x i64> @vextsh2dLE(<8 x i16> %a) {
; CHECK-LE-LABEL: vextsh2dLE:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: vextsh2d 2, 2
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vextsh2dLE:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis 3, 2, .LCPI3_0@toc@ha
; CHECK-BE-NEXT: addi 3, 3, .LCPI3_0@toc@l
; CHECK-BE-NEXT: lxv 0, 0(3)
; CHECK-BE-NEXT: xxperm 34, 34, 0
; CHECK-BE-NEXT: vextsh2d 2, 2
; CHECK-BE-NEXT: blr
entry:
%vecext = extractelement <8 x i16> %a, i32 0
%conv = sext i16 %vecext to i64
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
%vecext1 = extractelement <8 x i16> %a, i32 4
%conv2 = sext i16 %vecext1 to i64
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
ret <2 x i64> %vecinit3
}
define <2 x i64> @vextsw2dLE(<4 x i32> %a) {
; CHECK-LE-LABEL: vextsw2dLE:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: vextsw2d 2, 2
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vextsw2dLE:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: vmrgew 2, 2, 2
; CHECK-BE-NEXT: vextsw2d 2, 2
; CHECK-BE-NEXT: blr
entry:
%vecext = extractelement <4 x i32> %a, i32 0
%conv = sext i32 %vecext to i64
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
%vecext1 = extractelement <4 x i32> %a, i32 2
%conv2 = sext i32 %vecext1 to i64
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
ret <2 x i64> %vecinit3
}
define <4 x i32> @vextsb2wBE(<16 x i8> %a) {
; CHECK-LE-LABEL: vextsb2wBE:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: vsldoi 2, 2, 2, 13
; CHECK-LE-NEXT: vextsb2w 2, 2
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vextsb2wBE:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: vextsb2w 2, 2
; CHECK-BE-NEXT: blr
entry:
%vecext = extractelement <16 x i8> %a, i32 3
%conv = sext i8 %vecext to i32
%vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
%vecext1 = extractelement <16 x i8> %a, i32 7
%conv2 = sext i8 %vecext1 to i32
%vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
%vecext4 = extractelement <16 x i8> %a, i32 11
%conv5 = sext i8 %vecext4 to i32
%vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
%vecext7 = extractelement <16 x i8> %a, i32 15
%conv8 = sext i8 %vecext7 to i32
%vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
ret <4 x i32> %vecinit9
}
define <2 x i64> @vextsb2dBE(<16 x i8> %a) {
; CHECK-LE-LABEL: vextsb2dBE:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: vsldoi 2, 2, 2, 9
; CHECK-LE-NEXT: vextsb2d 2, 2
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vextsb2dBE:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: vextsb2d 2, 2
; CHECK-BE-NEXT: blr
entry:
%vecext = extractelement <16 x i8> %a, i32 7
%conv = sext i8 %vecext to i64
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
%vecext1 = extractelement <16 x i8> %a, i32 15
%conv2 = sext i8 %vecext1 to i64
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
ret <2 x i64> %vecinit3
}
define <4 x i32> @vextsh2wBE(<8 x i16> %a) {
; CHECK-LE-LABEL: vextsh2wBE:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: vsldoi 2, 2, 2, 14
; CHECK-LE-NEXT: vextsh2w 2, 2
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vextsh2wBE:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: vextsh2w 2, 2
; CHECK-BE-NEXT: blr
entry:
%vecext = extractelement <8 x i16> %a, i32 1
%conv = sext i16 %vecext to i32
%vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
%vecext1 = extractelement <8 x i16> %a, i32 3
%conv2 = sext i16 %vecext1 to i32
%vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
%vecext4 = extractelement <8 x i16> %a, i32 5
%conv5 = sext i16 %vecext4 to i32
%vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
%vecext7 = extractelement <8 x i16> %a, i32 7
%conv8 = sext i16 %vecext7 to i32
%vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
ret <4 x i32> %vecinit9
}
define <2 x i64> @vextsh2dBE(<8 x i16> %a) {
; CHECK-LE-LABEL: vextsh2dBE:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: vsldoi 2, 2, 2, 10
; CHECK-LE-NEXT: vextsh2d 2, 2
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vextsh2dBE:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: vextsh2d 2, 2
; CHECK-BE-NEXT: blr
entry:
%vecext = extractelement <8 x i16> %a, i32 3
%conv = sext i16 %vecext to i64
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
%vecext1 = extractelement <8 x i16> %a, i32 7
%conv2 = sext i16 %vecext1 to i64
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
ret <2 x i64> %vecinit3
}
define <2 x i64> @vextsw2dBE(<4 x i32> %a) {
; CHECK-LE-LABEL: vextsw2dBE:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: vsldoi 2, 2, 2, 12
; CHECK-LE-NEXT: vextsw2d 2, 2
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vextsw2dBE:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: vextsw2d 2, 2
; CHECK-BE-NEXT: blr
entry:
%vecext = extractelement <4 x i32> %a, i32 1
%conv = sext i32 %vecext to i64
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
%vecext1 = extractelement <4 x i32> %a, i32 3
%conv2 = sext i32 %vecext1 to i64
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
ret <2 x i64> %vecinit3
}
define <2 x i64> @vextDiffVectors(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LE-LABEL: vextDiffVectors:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li 3, 0
; CHECK-LE-NEXT: mfvsrwz 4, 35
; CHECK-LE-NEXT: vextuwrx 3, 3, 2
; CHECK-LE-NEXT: extsw 4, 4
; CHECK-LE-NEXT: extsw 3, 3
; CHECK-LE-NEXT: mtvsrdd 34, 4, 3
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vextDiffVectors:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: li 3, 0
; CHECK-BE-NEXT: li 4, 8
; CHECK-BE-NEXT: vextuwlx 3, 3, 2
; CHECK-BE-NEXT: vextuwlx 4, 4, 3
; CHECK-BE-NEXT: extsw 3, 3
; CHECK-BE-NEXT: extsw 4, 4
; CHECK-BE-NEXT: mtvsrdd 34, 3, 4
; CHECK-BE-NEXT: blr
entry:
%vecext = extractelement <4 x i32> %a, i32 0
%conv = sext i32 %vecext to i64
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
%vecext1 = extractelement <4 x i32> %b, i32 2
%conv2 = sext i32 %vecext1 to i64
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
ret <2 x i64> %vecinit3
}
define <8 x i16> @testInvalidExtend(<16 x i8> %a) {
; CHECK-LE-LABEL: testInvalidExtend:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li 3, 0
; CHECK-LE-NEXT: li 4, 2
; CHECK-LE-NEXT: li 5, 4
; CHECK-LE-NEXT: li 6, 6
; CHECK-LE-NEXT: vextubrx 3, 3, 2
; CHECK-LE-NEXT: vextubrx 4, 4, 2
; CHECK-LE-NEXT: vextubrx 5, 5, 2
; CHECK-LE-NEXT: vextubrx 6, 6, 2
; CHECK-LE-NEXT: li 7, 8
; CHECK-LE-NEXT: li 8, 10
; CHECK-LE-NEXT: li 9, 12
; CHECK-LE-NEXT: li 10, 14
; CHECK-LE-NEXT: extsb 3, 3
; CHECK-LE-NEXT: extsb 4, 4
; CHECK-LE-NEXT: extsb 5, 5
; CHECK-LE-NEXT: extsb 6, 6
; CHECK-LE-NEXT: vextubrx 7, 7, 2
; CHECK-LE-NEXT: vextubrx 8, 8, 2
; CHECK-LE-NEXT: extsb 7, 7
; CHECK-LE-NEXT: extsb 8, 8
; CHECK-LE-NEXT: mtvsrd 35, 4
; CHECK-LE-NEXT: vextubrx 9, 9, 2
; CHECK-LE-NEXT: vextubrx 10, 10, 2
; CHECK-LE-NEXT: mtvsrd 34, 3
; CHECK-LE-NEXT: mtvsrd 36, 6
; CHECK-LE-NEXT: extsb 9, 9
; CHECK-LE-NEXT: extsb 10, 10
; CHECK-LE-NEXT: vmrghh 2, 3, 2
; CHECK-LE-NEXT: mtvsrd 35, 5
; CHECK-LE-NEXT: vmrghh 3, 4, 3
; CHECK-LE-NEXT: mtvsrd 36, 10
; CHECK-LE-NEXT: xxmrglw 0, 35, 34
; CHECK-LE-NEXT: mtvsrd 34, 7
; CHECK-LE-NEXT: mtvsrd 35, 8
; CHECK-LE-NEXT: vmrghh 2, 3, 2
; CHECK-LE-NEXT: mtvsrd 35, 9
; CHECK-LE-NEXT: vmrghh 3, 4, 3
; CHECK-LE-NEXT: xxmrglw 1, 35, 34
; CHECK-LE-NEXT: xxmrgld 34, 1, 0
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: testInvalidExtend:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: li 9, 12
; CHECK-BE-NEXT: li 10, 14
; CHECK-BE-NEXT: li 7, 8
; CHECK-BE-NEXT: li 8, 10
; CHECK-BE-NEXT: vextublx 9, 9, 2
; CHECK-BE-NEXT: vextublx 10, 10, 2
; CHECK-BE-NEXT: vextublx 7, 7, 2
; CHECK-BE-NEXT: vextublx 8, 8, 2
; CHECK-BE-NEXT: li 5, 4
; CHECK-BE-NEXT: li 6, 6
; CHECK-BE-NEXT: li 3, 0
; CHECK-BE-NEXT: li 4, 2
; CHECK-BE-NEXT: extsb 9, 9
; CHECK-BE-NEXT: extsb 10, 10
; CHECK-BE-NEXT: extsb 7, 7
; CHECK-BE-NEXT: extsb 8, 8
; CHECK-BE-NEXT: vextublx 5, 5, 2
; CHECK-BE-NEXT: vextublx 6, 6, 2
; CHECK-BE-NEXT: extsb 5, 5
; CHECK-BE-NEXT: extsb 6, 6
; CHECK-BE-NEXT: mtfprwz 1, 9
; CHECK-BE-NEXT: addis 9, 2, .LCPI11_0@toc@ha
; CHECK-BE-NEXT: mtfprwz 0, 10
; CHECK-BE-NEXT: mtfprwz 3, 7
; CHECK-BE-NEXT: vextublx 3, 3, 2
; CHECK-BE-NEXT: extsb 3, 3
; CHECK-BE-NEXT: mtfprwz 4, 3
; CHECK-BE-NEXT: addi 9, 9, .LCPI11_0@toc@l
; CHECK-BE-NEXT: vextublx 4, 4, 2
; CHECK-BE-NEXT: extsb 4, 4
; CHECK-BE-NEXT: lxv 2, 0(9)
; CHECK-BE-NEXT: xxperm 0, 1, 2
; CHECK-BE-NEXT: mtfprwz 1, 8
; CHECK-BE-NEXT: xxperm 1, 3, 2
; CHECK-BE-NEXT: mtfprwz 3, 5
; CHECK-BE-NEXT: xxmrghw 0, 1, 0
; CHECK-BE-NEXT: mtfprwz 1, 6
; CHECK-BE-NEXT: xxperm 1, 3, 2
; CHECK-BE-NEXT: mtfprwz 3, 4
; CHECK-BE-NEXT: xxperm 3, 4, 2
; CHECK-BE-NEXT: xxmrghw 1, 3, 1
; CHECK-BE-NEXT: xxmrghd 34, 1, 0
; CHECK-BE-NEXT: blr
entry:
%vecext = extractelement <16 x i8> %a, i32 0
%conv = sext i8 %vecext to i16
%vecinit = insertelement <8 x i16> undef, i16 %conv, i32 0
%vecext1 = extractelement <16 x i8> %a, i32 2
%conv2 = sext i8 %vecext1 to i16
%vecinit3 = insertelement <8 x i16> %vecinit, i16 %conv2, i32 1
%vecext4 = extractelement <16 x i8> %a, i32 4
%conv5 = sext i8 %vecext4 to i16
%vecinit6 = insertelement <8 x i16> %vecinit3, i16 %conv5, i32 2
%vecext7 = extractelement <16 x i8> %a, i32 6
%conv8 = sext i8 %vecext7 to i16
%vecinit9 = insertelement <8 x i16> %vecinit6, i16 %conv8, i32 3
%vecext10 = extractelement <16 x i8> %a, i32 8
%conv11 = sext i8 %vecext10 to i16
%vecinit12 = insertelement <8 x i16> %vecinit9, i16 %conv11, i32 4
%vecext13 = extractelement <16 x i8> %a, i32 10
%conv14 = sext i8 %vecext13 to i16
%vecinit15 = insertelement <8 x i16> %vecinit12, i16 %conv14, i32 5
%vecext16 = extractelement <16 x i8> %a, i32 12
%conv17 = sext i8 %vecext16 to i16
%vecinit18 = insertelement <8 x i16> %vecinit15, i16 %conv17, i32 6
%vecext19 = extractelement <16 x i8> %a, i32 14
%conv20 = sext i8 %vecext19 to i16
%vecinit21 = insertelement <8 x i16> %vecinit18, i16 %conv20, i32 7
ret <8 x i16> %vecinit21
}