[PowerPC] Add SDNPMemOperand to some nodes (#115580)

Nodes created with `getMemIntrinsicNode` have memory operands. In order
for operands to be propagated to machine instructions, the nodes should
have `SDNPMemOperand` property.

Similar to 3c8c385a.
This commit is contained in:
Sergei Barannikov 2024-11-15 20:36:56 +03:00 committed by GitHub
parent e9e8f59dd4
commit 032014ef10
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
30 changed files with 448 additions and 273 deletions

View File

@ -166,17 +166,17 @@ def PPCany_fcfidus : PatFrags<(ops node:$op),
def PPCstore_scal_int_from_vsr:
SDNode<"PPCISD::ST_VSR_SCAL_INT", SDT_PPCstore_scal_int_from_vsr,
[SDNPHasChain, SDNPMayStore]>;
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
[SDNPHasChain, SDNPMayStore]>;
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx,
[SDNPHasChain, SDNPMayLoad]>;
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix,
[SDNPHasChain, SDNPMayStore]>;
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def PPCVexts : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>;
// Extract FPSCR (not modeled at the DAG level).
@ -376,7 +376,7 @@ def PPCatomicCmpSwap_16 :
def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
[SDNPHasChain, SDNPMayStore]>;
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def PPCStoreCond : SDNode<"PPCISD::STORE_COND", SDT_StoreCond,
[SDNPHasChain, SDNPMayStore,
SDNPMemOperand, SDNPOutGlue]>;

View File

@ -105,7 +105,7 @@ def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [
// PPC Specific DAG Nodes.
def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX,
[SDNPHasChain, SDNPMayLoad]>;
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
// Top-level class for prefixed instructions.
class PI<bits<6> pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,

View File

@ -90,11 +90,11 @@ def SDT_PPCxxperm : SDTypeProfile<1, 3, [
def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
[SDNPHasChain, SDNPMayStore]>;
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def PPCld_vec_be : SDNode<"PPCISD::LOAD_VEC_BE", SDT_PPCld_vec_be,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCst_vec_be : SDNode<"PPCISD::STORE_VEC_BE", SDT_PPCst_vec_be,
[SDNPHasChain, SDNPMayStore]>;
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>;
def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;

View File

@ -55,9 +55,9 @@ define dso_local void @foo1_int_be_reuse4B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 1798
; P8-LE-NEXT: ori 4, 4, 1284
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
; P8-LE-NEXT: li 4, 2312
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: sth 4, 20(3)
; P8-LE-NEXT: blr
;
@ -143,9 +143,9 @@ define dso_local void @foo2_int_le_reuse4B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 2826
; P8-LE-NEXT: ori 4, 4, 2312
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
; P8-LE-NEXT: li 4, 3340
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: sth 4, 20(3)
; P8-LE-NEXT: blr
;
@ -231,9 +231,9 @@ define dso_local void @foo3_int_be_reuse4B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 1543
; P8-LE-NEXT: ori 4, 4, 1029
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
; P8-LE-NEXT: li 4, 2057
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: sth 4, 20(3)
; P8-LE-NEXT: blr
;
@ -313,9 +313,9 @@ define dso_local void @foo4_int_le_reuse4B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 2571
; P8-LE-NEXT: ori 4, 4, 2057
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
; P8-LE-NEXT: li 4, 3085
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: sth 4, 20(3)
; P8-LE-NEXT: blr
;
@ -389,8 +389,8 @@ define dso_local void @foo5_int_be_reuse4B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 1029
; P8-LE-NEXT: ori 4, 4, 1543
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo5_int_be_reuse4B:
@ -455,8 +455,8 @@ define dso_local void @foo6_int_le_reuse4B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 2057
; P8-LE-NEXT: ori 4, 4, 2571
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo6_int_le_reuse4B:
@ -1221,8 +1221,8 @@ define dso_local void @foo15_int_noreuse4B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 1029
; P8-LE-NEXT: ori 4, 4, 1544
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo15_int_noreuse4B:
@ -1371,8 +1371,8 @@ define dso_local void @foo17_fp_be_reuse4B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 16673
; P8-LE-NEXT: ori 4, 4, 39322
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo17_fp_be_reuse4B:
@ -1437,8 +1437,8 @@ define dso_local void @foo18_fp_le_reuse4B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 16675
; P8-LE-NEXT: ori 4, 4, 13107
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo18_fp_le_reuse4B:
@ -1504,8 +1504,8 @@ define dso_local void @foo19_fp_be_reuse8B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: li 4, 4105
; P8-LE-NEXT: rldic 4, 4, 50, 1
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: std 4, 16(3)
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo19_fp_be_reuse8B:
@ -1649,8 +1649,8 @@ define dso_local void @foo21_fp_noreuse4B(ptr nocapture noundef writeonly %a) lo
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 16268
; P8-LE-NEXT: ori 4, 4, 52430
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo21_fp_noreuse4B:
@ -1716,8 +1716,8 @@ define dso_local void @foo22_fp_noreuse8B(ptr nocapture noundef writeonly %a) lo
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: li 4, 21503
; P8-LE-NEXT: rotldi 4, 4, 52
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: std 4, 16(3)
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo22_fp_noreuse8B:

View File

@ -45,8 +45,8 @@ define dso_local void @foo1(ptr nocapture noundef writeonly %a) local_unnamed_ad
; P8-LE-NEXT: addi 4, 4, .LCPI0_0@toc@l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: li 4, 3333
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: sth 4, 16(3)
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo1:
@ -109,8 +109,8 @@ define dso_local void @foo2(ptr nocapture noundef writeonly %a) local_unnamed_ad
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 3333
; P8-LE-NEXT: ori 4, 4, 3333
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo2:
@ -182,9 +182,9 @@ define dso_local void @foo3(ptr nocapture noundef writeonly %a) local_unnamed_ad
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 3333
; P8-LE-NEXT: ori 4, 4, 3333
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
; P8-LE-NEXT: li 4, 3333
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: sth 4, 20(3)
; P8-LE-NEXT: blr
;
@ -334,8 +334,8 @@ define dso_local void @foo5(ptr nocapture noundef writeonly %a) local_unnamed_ad
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 5
; P8-LE-NEXT: ori 4, 4, 5653
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo5:
@ -473,8 +473,8 @@ define dso_local void @foo7(ptr nocapture noundef writeonly %a) local_unnamed_ad
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 508
; P8-LE-NEXT: ori 4, 4, 41045
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: std 4, 16(3)
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo7:
@ -539,8 +539,8 @@ define dso_local void @foo8(ptr nocapture noundef writeonly %a) local_unnamed_ad
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 16469
; P8-LE-NEXT: ori 4, 4, 7864
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo8:

View File

@ -574,13 +574,13 @@ define dso_local void @test_stores_exceed_vec_size(<4 x i32> %a, ptr nocapture %
; CHECK-NEXT: addi r3, r3, .LCPI16_0@toc@l
; CHECK-NEXT: lxvd2x vs0, 0, r3
; CHECK-NEXT: li r3, 16
; CHECK-NEXT: stfiwx f1, r5, r3
; CHECK-NEXT: li r3, 20
; CHECK-NEXT: stxsiwx vs34, r5, r3
; CHECK-NEXT: xxswapd vs35, vs0
; CHECK-NEXT: vperm v3, v2, v2, v3
; CHECK-NEXT: xxswapd vs0, vs35
; CHECK-NEXT: stxvd2x vs0, 0, r5
; CHECK-NEXT: stfiwx f1, r5, r3
; CHECK-NEXT: li r3, 20
; CHECK-NEXT: stxsiwx vs34, r5, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test_stores_exceed_vec_size:

View File

@ -39,10 +39,10 @@ define void @qpFmadd(ptr nocapture readonly %a, ptr nocapture %b,
; CHECK-P8-NEXT: vmr v3, v31
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: li r3, 48
; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: addi r1, r1, 80
; CHECK-P8-NEXT: ld r0, 16(r1)
@ -95,10 +95,10 @@ define void @qpFmadd_02(ptr nocapture readonly %a,
; CHECK-P8-NEXT: vmr v2, v31
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: li r3, 48
; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: addi r1, r1, 80
; CHECK-P8-NEXT: ld r0, 16(r1)
@ -214,8 +214,8 @@ define void @qpFnmadd(ptr nocapture readonly %a,
; CHECK-P8-NEXT: stb r4, 63(r1)
; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: li r3, 64
; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: ld r30, 80(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: addi r1, r1, 96
; CHECK-P8-NEXT: ld r0, 16(r1)
@ -331,10 +331,10 @@ define void @qpFmsub(ptr nocapture readonly %a,
; CHECK-P8-NEXT: vmr v2, v31
; CHECK-P8-NEXT: bl __subkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: li r3, 48
; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: addi r1, r1, 80
; CHECK-P8-NEXT: ld r0, 16(r1)
@ -451,8 +451,8 @@ define void @qpFnmsub(ptr nocapture readonly %a,
; CHECK-P8-NEXT: stb r4, 63(r1)
; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: li r3, 64
; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: ld r30, 80(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: addi r1, r1, 96
; CHECK-P8-NEXT: ld r0, 16(r1)

View File

@ -576,13 +576,13 @@ define void @mixParam_03(fp128 %f1, ptr nocapture %d1, <4 x i32> %vec1,
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: .cfi_offset v31, -32
; CHECK-P8-NEXT: ld r4, 184(r1)
; CHECK-P8-NEXT: li r3, 48
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: xxswapd vs1, v3
; CHECK-P8-NEXT: li r3, 48
; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: mr r30, r5
; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: stxvd2x vs0, 0, r9
; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: mr r3, r10
; CHECK-P8-NEXT: stxvd2x vs1, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, 0, r9
@ -639,15 +639,15 @@ define fastcc void @mixParam_03f(fp128 %f1, ptr nocapture %d1, <4 x i32> %vec1,
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: .cfi_offset v31, -32
; CHECK-P8-NEXT: li r6, 48
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: xxswapd vs1, v3
; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: li r6, 48
; CHECK-P8-NEXT: mr r30, r3
; CHECK-P8-NEXT: mr r3, r5
; CHECK-P8-NEXT: stvx v31, r1, r6 # 16-byte Folded Spill
; CHECK-P8-NEXT: stxvd2x vs0, 0, r4
; CHECK-P8-NEXT: stxvd2x vs1, 0, r7
; CHECK-P8-NEXT: stvx v31, r1, r6 # 16-byte Folded Spill
; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: xxswapd v31, vs0
; CHECK-P8-NEXT: bl __floatsikf

View File

@ -1,12 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
; This code causes an assertion failure if dereferenceable flag is not properly set when in merging consecutive stores
; CHECK-LABEL: func:
; CHECK: lxvd2x [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
; CHECK-NOT: lxvd2x
; CHECK: stxvd2x [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
define <2 x i64> @func(ptr %pdst) {
; CHECK-LABEL: func:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi 4, 1, -32
; CHECK-NEXT: lxvd2x 0, 0, 4
; CHECK-NEXT: xxswapd 34, 0
; CHECK-NEXT: lxvd2x 0, 0, 4
; CHECK-NEXT: stxvd2x 0, 0, 3
; CHECK-NEXT: blr
entry:
%a = alloca [4 x i64], align 8
%psrc1 = getelementptr inbounds i64, ptr %a, i64 1

View File

@ -23,9 +23,9 @@ define dso_local void @g(ptr %agg.result) local_unnamed_addr #0 {
; CHECK-NEXT: ld r7, 24(r5)
; CHECK-NEXT: std r7, 24(r3)
; CHECK-NEXT: ld r5, 32(r5)
; CHECK-NEXT: std r5, 32(r3)
; CHECK-NEXT: stwbrx r4, 0, r3
; CHECK-NEXT: li r4, 20
; CHECK-NEXT: std r5, 32(r3)
; CHECK-NEXT: stwbrx r6, r3, r4
; CHECK-NEXT: addi r1, r1, 112
; CHECK-NEXT: ld r0, 16(r1)

View File

@ -55,9 +55,9 @@ define dso_local void @poly2_lshift1(ptr nocapture %p) local_unnamed_addr #0 {
; CHECK-NEXT: std r6, 56(r3)
; CHECK-NEXT: rotldi r6, r7, 1
; CHECK-NEXT: xxswapd vs0, vs0
; CHECK-NEXT: stxvd2x vs0, r3, r4
; CHECK-NEXT: rldimi r6, r5, 1, 0
; CHECK-NEXT: std r6, 64(r3)
; CHECK-NEXT: stxvd2x vs0, r3, r4
; CHECK-NEXT: blr
entry:
%0 = load i64, ptr %p, align 8

View File

@ -33,13 +33,13 @@ define void @pr59074(ptr %0) {
; LE32-NEXT: li 8, 12
; LE32-NEXT: xxswapd 0, 0
; LE32-NEXT: rlwimi 5, 6, 0, 30, 28
; LE32-NEXT: addi 4, 4, -12
; LE32-NEXT: rlwinm 9, 4, 29, 28, 29
; LE32-NEXT: stxvd2x 0, 0, 5
; LE32-NEXT: stw 7, 44(1)
; LE32-NEXT: addi 4, 4, -12
; LE32-NEXT: stw 7, 40(1)
; LE32-NEXT: stw 7, 36(1)
; LE32-NEXT: stw 8, 16(1)
; LE32-NEXT: rlwinm 9, 4, 29, 28, 29
; LE32-NEXT: stxvd2x 0, 0, 5
; LE32-NEXT: clrlwi 4, 4, 27
; LE32-NEXT: lwzux 5, 9, 6
; LE32-NEXT: lwz 6, 8(9)

View File

@ -1,9 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr8 \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \
; RUN: -check-prefix=CHECK-P8 %s
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr8 -disable-ppc-vsx-swap-removal \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \
; RUN: -check-prefix=NOOPTSWAP %s
; RUN: -check-prefix=NOOPTSWAP-P8 %s
; RUN: llc -O3 -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -verify-machineinstrs -ppc-vsr-nums-as-vr < %s | FileCheck \
@ -11,7 +14,7 @@
; RUN: llc -O3 -mcpu=pwr9 -disable-ppc-vsx-swap-removal -mattr=-power9-vector \
; RUN: -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s \
; RUN: | FileCheck -check-prefix=NOOPTSWAP %s
; RUN: | FileCheck -check-prefix=NOOPTSWAP-P9 %s
; LH: 2016-11-17
; Updated align attritue from 16 to 8 to keep swap instructions tests.
@ -41,6 +44,250 @@
@ca = common global [4096 x i32] zeroinitializer, align 8
define void @foo() {
; CHECK-P8-LABEL: foo:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li 3, 256
; CHECK-P8-NEXT: std 30, -16(1) # 8-byte Folded Spill
; CHECK-P8-NEXT: addis 4, 2, .LC0@toc@ha
; CHECK-P8-NEXT: addis 5, 2, .LC1@toc@ha
; CHECK-P8-NEXT: addis 6, 2, .LC2@toc@ha
; CHECK-P8-NEXT: addis 7, 2, .LC3@toc@ha
; CHECK-P8-NEXT: li 8, 16
; CHECK-P8-NEXT: li 9, 32
; CHECK-P8-NEXT: mtctr 3
; CHECK-P8-NEXT: ld 4, .LC0@toc@l(4)
; CHECK-P8-NEXT: ld 5, .LC1@toc@l(5)
; CHECK-P8-NEXT: ld 6, .LC2@toc@l(6)
; CHECK-P8-NEXT: ld 7, .LC3@toc@l(7)
; CHECK-P8-NEXT: li 3, 0
; CHECK-P8-NEXT: li 10, 48
; CHECK-P8-NEXT: .p2align 4
; CHECK-P8-NEXT: .LBB0_1: # %vector.body
; CHECK-P8-NEXT: #
; CHECK-P8-NEXT: lxvd2x 34, 4, 3
; CHECK-P8-NEXT: lxvd2x 35, 5, 3
; CHECK-P8-NEXT: add 11, 4, 3
; CHECK-P8-NEXT: add 12, 5, 3
; CHECK-P8-NEXT: lxvd2x 36, 6, 3
; CHECK-P8-NEXT: add 30, 6, 3
; CHECK-P8-NEXT: lxvd2x 37, 11, 8
; CHECK-P8-NEXT: lxvd2x 32, 12, 10
; CHECK-P8-NEXT: vadduwm 2, 3, 2
; CHECK-P8-NEXT: lxvd2x 35, 12, 8
; CHECK-P8-NEXT: vmuluwm 2, 2, 4
; CHECK-P8-NEXT: lxvd2x 36, 11, 9
; CHECK-P8-NEXT: vadduwm 3, 3, 5
; CHECK-P8-NEXT: lxvd2x 37, 12, 9
; CHECK-P8-NEXT: stxvd2x 34, 7, 3
; CHECK-P8-NEXT: lxvd2x 34, 30, 10
; CHECK-P8-NEXT: vadduwm 4, 5, 4
; CHECK-P8-NEXT: lxvd2x 37, 11, 10
; CHECK-P8-NEXT: add 11, 7, 3
; CHECK-P8-NEXT: addi 3, 3, 64
; CHECK-P8-NEXT: vadduwm 5, 0, 5
; CHECK-P8-NEXT: lxvd2x 32, 30, 8
; CHECK-P8-NEXT: vmuluwm 2, 5, 2
; CHECK-P8-NEXT: vmuluwm 3, 3, 0
; CHECK-P8-NEXT: lxvd2x 32, 30, 9
; CHECK-P8-NEXT: stxvd2x 34, 11, 10
; CHECK-P8-NEXT: vmuluwm 4, 4, 0
; CHECK-P8-NEXT: stxvd2x 35, 11, 8
; CHECK-P8-NEXT: stxvd2x 36, 11, 9
; CHECK-P8-NEXT: bdnz .LBB0_1
; CHECK-P8-NEXT: # %bb.2: # %for.end
; CHECK-P8-NEXT: ld 30, -16(1) # 8-byte Folded Reload
; CHECK-P8-NEXT: blr
;
; NOOPTSWAP-P8-LABEL: foo:
; NOOPTSWAP-P8: # %bb.0: # %entry
; NOOPTSWAP-P8-NEXT: li 3, 256
; NOOPTSWAP-P8-NEXT: std 30, -16(1) # 8-byte Folded Spill
; NOOPTSWAP-P8-NEXT: addis 4, 2, .LC0@toc@ha
; NOOPTSWAP-P8-NEXT: addis 5, 2, .LC1@toc@ha
; NOOPTSWAP-P8-NEXT: addis 6, 2, .LC2@toc@ha
; NOOPTSWAP-P8-NEXT: addis 7, 2, .LC3@toc@ha
; NOOPTSWAP-P8-NEXT: li 8, 16
; NOOPTSWAP-P8-NEXT: li 9, 32
; NOOPTSWAP-P8-NEXT: mtctr 3
; NOOPTSWAP-P8-NEXT: ld 4, .LC0@toc@l(4)
; NOOPTSWAP-P8-NEXT: ld 5, .LC1@toc@l(5)
; NOOPTSWAP-P8-NEXT: ld 6, .LC2@toc@l(6)
; NOOPTSWAP-P8-NEXT: ld 7, .LC3@toc@l(7)
; NOOPTSWAP-P8-NEXT: li 3, 0
; NOOPTSWAP-P8-NEXT: li 10, 48
; NOOPTSWAP-P8-NEXT: .p2align 4
; NOOPTSWAP-P8-NEXT: .LBB0_1: # %vector.body
; NOOPTSWAP-P8-NEXT: #
; NOOPTSWAP-P8-NEXT: lxvd2x 0, 4, 3
; NOOPTSWAP-P8-NEXT: lxvd2x 1, 5, 3
; NOOPTSWAP-P8-NEXT: add 30, 6, 3
; NOOPTSWAP-P8-NEXT: add 11, 4, 3
; NOOPTSWAP-P8-NEXT: add 12, 5, 3
; NOOPTSWAP-P8-NEXT: lxvd2x 2, 11, 8
; NOOPTSWAP-P8-NEXT: lxvd2x 3, 12, 8
; NOOPTSWAP-P8-NEXT: lxvd2x 4, 11, 9
; NOOPTSWAP-P8-NEXT: lxvd2x 5, 12, 9
; NOOPTSWAP-P8-NEXT: lxvd2x 6, 11, 10
; NOOPTSWAP-P8-NEXT: add 11, 7, 3
; NOOPTSWAP-P8-NEXT: lxvd2x 7, 12, 10
; NOOPTSWAP-P8-NEXT: xxswapd 34, 0
; NOOPTSWAP-P8-NEXT: lxvd2x 0, 6, 3
; NOOPTSWAP-P8-NEXT: xxswapd 35, 1
; NOOPTSWAP-P8-NEXT: lxvd2x 1, 30, 8
; NOOPTSWAP-P8-NEXT: vadduwm 2, 3, 2
; NOOPTSWAP-P8-NEXT: xxswapd 36, 2
; NOOPTSWAP-P8-NEXT: xxswapd 32, 4
; NOOPTSWAP-P8-NEXT: xxswapd 38, 6
; NOOPTSWAP-P8-NEXT: xxswapd 37, 3
; NOOPTSWAP-P8-NEXT: xxswapd 33, 5
; NOOPTSWAP-P8-NEXT: xxswapd 39, 7
; NOOPTSWAP-P8-NEXT: vadduwm 3, 5, 4
; NOOPTSWAP-P8-NEXT: vadduwm 4, 1, 0
; NOOPTSWAP-P8-NEXT: xxswapd 40, 0
; NOOPTSWAP-P8-NEXT: xxswapd 41, 1
; NOOPTSWAP-P8-NEXT: lxvd2x 0, 30, 9
; NOOPTSWAP-P8-NEXT: lxvd2x 1, 30, 10
; NOOPTSWAP-P8-NEXT: vmuluwm 2, 2, 8
; NOOPTSWAP-P8-NEXT: vmuluwm 3, 3, 9
; NOOPTSWAP-P8-NEXT: xxswapd 42, 0
; NOOPTSWAP-P8-NEXT: xxswapd 43, 1
; NOOPTSWAP-P8-NEXT: vmuluwm 4, 4, 10
; NOOPTSWAP-P8-NEXT: xxswapd 0, 34
; NOOPTSWAP-P8-NEXT: vadduwm 2, 7, 6
; NOOPTSWAP-P8-NEXT: xxswapd 1, 35
; NOOPTSWAP-P8-NEXT: vmuluwm 2, 2, 11
; NOOPTSWAP-P8-NEXT: stxvd2x 0, 7, 3
; NOOPTSWAP-P8-NEXT: addi 3, 3, 64
; NOOPTSWAP-P8-NEXT: stxvd2x 1, 11, 8
; NOOPTSWAP-P8-NEXT: xxswapd 2, 36
; NOOPTSWAP-P8-NEXT: stxvd2x 2, 11, 9
; NOOPTSWAP-P8-NEXT: xxswapd 3, 34
; NOOPTSWAP-P8-NEXT: stxvd2x 3, 11, 10
; NOOPTSWAP-P8-NEXT: bdnz .LBB0_1
; NOOPTSWAP-P8-NEXT: # %bb.2: # %for.end
; NOOPTSWAP-P8-NEXT: ld 30, -16(1) # 8-byte Folded Reload
; NOOPTSWAP-P8-NEXT: blr
;
; CHECK-P9-LABEL: foo:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: li 6, 256
; CHECK-P9-NEXT: addis 3, 2, .LC0@toc@ha
; CHECK-P9-NEXT: addis 4, 2, .LC1@toc@ha
; CHECK-P9-NEXT: addis 5, 2, .LC2@toc@ha
; CHECK-P9-NEXT: mtctr 6
; CHECK-P9-NEXT: addis 6, 2, .LC3@toc@ha
; CHECK-P9-NEXT: ld 3, .LC0@toc@l(3)
; CHECK-P9-NEXT: ld 4, .LC1@toc@l(4)
; CHECK-P9-NEXT: ld 5, .LC2@toc@l(5)
; CHECK-P9-NEXT: ld 6, .LC3@toc@l(6)
; CHECK-P9-NEXT: addi 3, 3, 32
; CHECK-P9-NEXT: addi 4, 4, 32
; CHECK-P9-NEXT: addi 5, 5, 32
; CHECK-P9-NEXT: addi 6, 6, 32
; CHECK-P9-NEXT: .p2align 4
; CHECK-P9-NEXT: .LBB0_1: # %vector.body
; CHECK-P9-NEXT: #
; CHECK-P9-NEXT: lxv 2, -32(6)
; CHECK-P9-NEXT: lxv 3, -32(5)
; CHECK-P9-NEXT: lxv 4, -16(5)
; CHECK-P9-NEXT: vadduwm 2, 3, 2
; CHECK-P9-NEXT: lxv 3, -32(4)
; CHECK-P9-NEXT: vmuluwm 2, 2, 3
; CHECK-P9-NEXT: lxv 3, -16(6)
; CHECK-P9-NEXT: vadduwm 3, 4, 3
; CHECK-P9-NEXT: lxv 4, 0(5)
; CHECK-P9-NEXT: stxv 2, -32(3)
; CHECK-P9-NEXT: lxv 2, -16(4)
; CHECK-P9-NEXT: vmuluwm 2, 3, 2
; CHECK-P9-NEXT: lxv 3, 0(6)
; CHECK-P9-NEXT: vadduwm 3, 4, 3
; CHECK-P9-NEXT: lxv 4, 16(5)
; CHECK-P9-NEXT: addi 5, 5, 64
; CHECK-P9-NEXT: stxv 2, -16(3)
; CHECK-P9-NEXT: lxv 2, 0(4)
; CHECK-P9-NEXT: vmuluwm 2, 3, 2
; CHECK-P9-NEXT: lxv 3, 16(6)
; CHECK-P9-NEXT: addi 6, 6, 64
; CHECK-P9-NEXT: vadduwm 3, 4, 3
; CHECK-P9-NEXT: stxv 2, 0(3)
; CHECK-P9-NEXT: lxv 2, 16(4)
; CHECK-P9-NEXT: addi 4, 4, 64
; CHECK-P9-NEXT: vmuluwm 2, 3, 2
; CHECK-P9-NEXT: stxv 2, 16(3)
; CHECK-P9-NEXT: addi 3, 3, 64
; CHECK-P9-NEXT: bdnz .LBB0_1
; CHECK-P9-NEXT: # %bb.2: # %for.end
; CHECK-P9-NEXT: blr
;
; NOOPTSWAP-P9-LABEL: foo:
; NOOPTSWAP-P9: # %bb.0: # %entry
; NOOPTSWAP-P9-NEXT: addis 4, 2, .LC0@toc@ha
; NOOPTSWAP-P9-NEXT: addis 5, 2, .LC1@toc@ha
; NOOPTSWAP-P9-NEXT: addis 6, 2, .LC2@toc@ha
; NOOPTSWAP-P9-NEXT: addis 7, 2, .LC3@toc@ha
; NOOPTSWAP-P9-NEXT: std 30, -16(1) # 8-byte Folded Spill
; NOOPTSWAP-P9-NEXT: ld 4, .LC0@toc@l(4)
; NOOPTSWAP-P9-NEXT: li 3, 256
; NOOPTSWAP-P9-NEXT: ld 5, .LC1@toc@l(5)
; NOOPTSWAP-P9-NEXT: ld 6, .LC2@toc@l(6)
; NOOPTSWAP-P9-NEXT: ld 7, .LC3@toc@l(7)
; NOOPTSWAP-P9-NEXT: mtctr 3
; NOOPTSWAP-P9-NEXT: li 3, 0
; NOOPTSWAP-P9-NEXT: li 8, 16
; NOOPTSWAP-P9-NEXT: li 9, 32
; NOOPTSWAP-P9-NEXT: li 10, 48
; NOOPTSWAP-P9-NEXT: .p2align 4
; NOOPTSWAP-P9-NEXT: .LBB0_1: # %vector.body
; NOOPTSWAP-P9-NEXT: #
; NOOPTSWAP-P9-NEXT: lxvd2x 0, 4, 3
; NOOPTSWAP-P9-NEXT: lxvd2x 1, 5, 3
; NOOPTSWAP-P9-NEXT: lxvd2x 2, 6, 3
; NOOPTSWAP-P9-NEXT: add 12, 5, 3
; NOOPTSWAP-P9-NEXT: add 11, 4, 3
; NOOPTSWAP-P9-NEXT: add 30, 6, 3
; NOOPTSWAP-P9-NEXT: lxvd2x 3, 11, 8
; NOOPTSWAP-P9-NEXT: xxswapd 34, 0
; NOOPTSWAP-P9-NEXT: xxswapd 35, 1
; NOOPTSWAP-P9-NEXT: lxvd2x 0, 12, 8
; NOOPTSWAP-P9-NEXT: xxswapd 36, 2
; NOOPTSWAP-P9-NEXT: lxvd2x 1, 11, 9
; NOOPTSWAP-P9-NEXT: vadduwm 2, 3, 2
; NOOPTSWAP-P9-NEXT: xxswapd 35, 3
; NOOPTSWAP-P9-NEXT: vmuluwm 2, 2, 4
; NOOPTSWAP-P9-NEXT: xxswapd 36, 0
; NOOPTSWAP-P9-NEXT: lxvd2x 0, 12, 9
; NOOPTSWAP-P9-NEXT: vadduwm 3, 4, 3
; NOOPTSWAP-P9-NEXT: xxswapd 36, 1
; NOOPTSWAP-P9-NEXT: lxvd2x 1, 12, 10
; NOOPTSWAP-P9-NEXT: xxswapd 37, 0
; NOOPTSWAP-P9-NEXT: lxvd2x 0, 11, 10
; NOOPTSWAP-P9-NEXT: add 11, 7, 3
; NOOPTSWAP-P9-NEXT: vadduwm 4, 5, 4
; NOOPTSWAP-P9-NEXT: xxswapd 32, 1
; NOOPTSWAP-P9-NEXT: xxswapd 37, 0
; NOOPTSWAP-P9-NEXT: lxvd2x 0, 30, 8
; NOOPTSWAP-P9-NEXT: vadduwm 5, 0, 5
; NOOPTSWAP-P9-NEXT: xxswapd 32, 0
; NOOPTSWAP-P9-NEXT: lxvd2x 0, 30, 9
; NOOPTSWAP-P9-NEXT: vmuluwm 3, 3, 0
; NOOPTSWAP-P9-NEXT: xxswapd 32, 0
; NOOPTSWAP-P9-NEXT: xxswapd 0, 34
; NOOPTSWAP-P9-NEXT: vmuluwm 4, 4, 0
; NOOPTSWAP-P9-NEXT: stxvd2x 0, 7, 3
; NOOPTSWAP-P9-NEXT: addi 3, 3, 64
; NOOPTSWAP-P9-NEXT: xxswapd 1, 35
; NOOPTSWAP-P9-NEXT: stxvd2x 1, 11, 8
; NOOPTSWAP-P9-NEXT: xxswapd 0, 36
; NOOPTSWAP-P9-NEXT: stxvd2x 0, 11, 9
; NOOPTSWAP-P9-NEXT: lxvd2x 0, 30, 10
; NOOPTSWAP-P9-NEXT: xxswapd 34, 0
; NOOPTSWAP-P9-NEXT: vmuluwm 2, 5, 2
; NOOPTSWAP-P9-NEXT: xxswapd 0, 34
; NOOPTSWAP-P9-NEXT: stxvd2x 0, 11, 10
; NOOPTSWAP-P9-NEXT: bdnz .LBB0_1
; NOOPTSWAP-P9-NEXT: # %bb.2: # %for.end
; NOOPTSWAP-P9-NEXT: ld 30, -16(1) # 8-byte Folded Reload
; NOOPTSWAP-P9-NEXT: blr
entry:
br label %vector.body
@ -96,80 +343,3 @@ vector.body:
for.end:
ret void
}
; CHECK-LABEL: @foo
; CHECK-NOT: xxpermdi
; CHECK-NOT: xxswapd
; CHECK-P9-NOT: xxpermdi
; CHECK: lxvd2x
; CHECK: lxvd2x
; CHECK-DAG: lxvd2x
; CHECK-DAG: vadduwm
; CHECK: vmuluwm
; CHECK: stxvd2x
; CHECK: lxvd2x
; CHECK: lxvd2x
; CHECK-DAG: lxvd2x
; CHECK-DAG: vadduwm
; CHECK: vmuluwm
; CHECK: stxvd2x
; CHECK: lxvd2x
; CHECK: lxvd2x
; CHECK-DAG: lxvd2x
; CHECK-DAG: vadduwm
; CHECK: vmuluwm
; CHECK: stxvd2x
; CHECK: lxvd2x
; CHECK: lxvd2x
; CHECK-DAG: lxvd2x
; CHECK-DAG: vadduwm
; CHECK: vmuluwm
; CHECK: stxvd2x
; NOOPTSWAP-LABEL: @foo
; NOOPTSWAP: lxvd2x
; NOOPTSWAP-DAG: lxvd2x
; NOOPTSWAP-DAG: lxvd2x
; NOOPTSWAP-DAG: xxswapd
; NOOPTSWAP-DAG: xxswapd
; NOOPTSWAP-DAG: xxswapd
; NOOPTSWAP-DAG: vadduwm
; NOOPTSWAP: vmuluwm
; NOOPTSWAP: xxswapd
; NOOPTSWAP-DAG: xxswapd
; NOOPTSWAP-DAG: xxswapd
; NOOPTSWAP-DAG: stxvd2x
; NOOPTSWAP-DAG: stxvd2x
; NOOPTSWAP: stxvd2x
; CHECK-P9-LABEL: @foo
; CHECK-P9-DAG: lxv
; CHECK-P9-DAG: lxv
; CHECK-P9-DAG: lxv
; CHECK-P9-DAG: lxv
; CHECK-P9-DAG: lxv
; CHECK-P9-DAG: lxv
; CHECK-P9-DAG: lxv
; CHECK-P9-DAG: lxv
; CHECK-P9-DAG: lxv
; CHECK-P9-DAG: lxv
; CHECK-P9-DAG: lxv
; CHECK-P9-DAG: lxv
; CHECK-P9-DAG: vadduwm
; CHECK-P9-DAG: vadduwm
; CHECK-P9-DAG: vadduwm
; CHECK-P9-DAG: vadduwm
; CHECK-P9-DAG: vmuluwm
; CHECK-P9-DAG: vmuluwm
; CHECK-P9-DAG: vmuluwm
; CHECK-P9-DAG: vmuluwm
; CHECK-P9-DAG: stxv
; CHECK-P9-DAG: stxv
; CHECK-P9-DAG: stxv
; CHECK-P9-DAG: stxv

View File

@ -45,11 +45,11 @@ define void @test8(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 32
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8:
@ -138,9 +138,9 @@ define void @test4(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
; CHECK-P8-NEXT: vperm v2, v5, v2, v4
; CHECK-P8-NEXT: xvcvuxddp vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4:
@ -278,10 +278,10 @@ define void @stest8(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 32
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: stest8:

View File

@ -424,12 +424,12 @@ define void @test16elt(ptr noalias nocapture sret(<16 x i16>) %agg.result, ptr n
; CHECK-P8-NEXT: xxmrglw vs0, v3, v0
; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0
; CHECK-P8-NEXT: xxswapd vs1, v2
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: vmrghh v4, v4, v8
; CHECK-P8-NEXT: xxmrglw vs3, v4, v7
; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
@ -1056,12 +1056,12 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x i16>) %agg.result
; CHECK-P8-NEXT: xxmrglw vs0, v3, v0
; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0
; CHECK-P8-NEXT: xxswapd vs1, v2
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: vmrghh v4, v4, v8
; CHECK-P8-NEXT: xxmrglw vs3, v4, v7
; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:

View File

@ -52,9 +52,9 @@ define void @test4elt(ptr noalias nocapture sret(<4 x i64>) %agg.result, <4 x fl
; CHECK-P8-NEXT: xvcvspdp vs0, vs1
; CHECK-P8-NEXT: xvcvdpuxds v3, vs0
; CHECK-P8-NEXT: xxswapd vs1, v2
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
@ -113,9 +113,9 @@ define void @test8elt(ptr noalias nocapture sret(<8 x i64>) %agg.result, ptr noc
; CHECK-P8-NEXT: li r4, 32
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: xxswapd vs3, v4
; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: xxswapd vs2, v3
; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
@ -206,6 +206,7 @@ define void @test16elt(ptr noalias nocapture sret(<16 x i64>) %agg.result, ptr n
; CHECK-P8-NEXT: xvcvdpuxds v2, vs4
; CHECK-P8-NEXT: xvcvdpuxds v3, vs5
; CHECK-P8-NEXT: xxswapd vs4, v1
; CHECK-P8-NEXT: stxvd2x vs4, r3, r7
; CHECK-P8-NEXT: xxswapd vs0, v4
; CHECK-P8-NEXT: xvcvdpuxds v4, vs2
; CHECK-P8-NEXT: xxswapd vs1, v0
@ -218,13 +219,12 @@ define void @test16elt(ptr noalias nocapture sret(<16 x i64>) %agg.result, ptr n
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-NEXT: li r4, 64
; CHECK-P8-NEXT: xxswapd vs3, v2
; CHECK-P8-NEXT: stxvd2x vs3, r3, r6
; CHECK-P8-NEXT: xxswapd vs1, v3
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: xxswapd vs2, v4
; CHECK-P8-NEXT: xxswapd vs5, v0
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: stxvd2x vs3, r3, r6
; CHECK-P8-NEXT: stxvd2x vs4, r3, r7
; CHECK-P8-NEXT: stxvd2x vs5, 0, r3
; CHECK-P8-NEXT: blr
;
@ -357,9 +357,9 @@ define void @test4elt_signed(ptr noalias nocapture sret(<4 x i64>) %agg.result,
; CHECK-P8-NEXT: xvcvspdp vs0, vs1
; CHECK-P8-NEXT: xvcvdpuxds v3, vs0
; CHECK-P8-NEXT: xxswapd vs1, v2
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:
@ -418,9 +418,9 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x i64>) %agg.result,
; CHECK-P8-NEXT: li r4, 32
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: xxswapd vs3, v4
; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: xxswapd vs2, v3
; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
@ -511,6 +511,7 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x i64>) %agg.result
; CHECK-P8-NEXT: xvcvdpuxds v2, vs4
; CHECK-P8-NEXT: xvcvdpuxds v3, vs5
; CHECK-P8-NEXT: xxswapd vs4, v1
; CHECK-P8-NEXT: stxvd2x vs4, r3, r7
; CHECK-P8-NEXT: xxswapd vs0, v4
; CHECK-P8-NEXT: xvcvdpuxds v4, vs2
; CHECK-P8-NEXT: xxswapd vs1, v0
@ -523,13 +524,12 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x i64>) %agg.result
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-NEXT: li r4, 64
; CHECK-P8-NEXT: xxswapd vs3, v2
; CHECK-P8-NEXT: stxvd2x vs3, r3, r6
; CHECK-P8-NEXT: xxswapd vs1, v3
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: xxswapd vs2, v4
; CHECK-P8-NEXT: xxswapd vs5, v0
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: stxvd2x vs3, r3, r6
; CHECK-P8-NEXT: stxvd2x vs4, r3, r7
; CHECK-P8-NEXT: stxvd2x vs5, 0, r3
; CHECK-P8-NEXT: blr
;

View File

@ -371,12 +371,12 @@ define void @test16elt(ptr noalias nocapture sret(<16 x i16>) %agg.result, ptr n
; CHECK-P8-NEXT: mtvsrd v8, r4
; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0
; CHECK-P8-NEXT: xxswapd vs1, v2
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: vmrghh v7, v7, v8
; CHECK-P8-NEXT: xxmrglw vs3, v7, v6
; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
@ -918,12 +918,12 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x i16>) %agg.result
; CHECK-P8-NEXT: mtvsrd v8, r4
; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0
; CHECK-P8-NEXT: xxswapd vs1, v2
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: vmrghh v7, v7, v8
; CHECK-P8-NEXT: xxmrglw vs3, v7, v6
; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:

View File

@ -190,11 +190,11 @@ define void @test16elt(ptr noalias nocapture sret(<16 x i32>) %agg.result, ptr n
; CHECK-P8-NEXT: xvcvdpuxws v3, vs0
; CHECK-P8-NEXT: vmrgew v4, v5, v4
; CHECK-P8-NEXT: xvcvdpuxws v5, vs1
; CHECK-P8-NEXT: stxvd2x v4, r3, r6
; CHECK-P8-NEXT: stxvd2x v2, r3, r5
; CHECK-P8-NEXT: vmrgew v3, v3, v0
; CHECK-P8-NEXT: xvcvdpuxws v0, vs4
; CHECK-P8-NEXT: stxvd2x v3, r3, r7
; CHECK-P8-NEXT: stxvd2x v4, r3, r6
; CHECK-P8-NEXT: stxvd2x v2, r3, r5
; CHECK-P8-NEXT: vmrgew v5, v0, v5
; CHECK-P8-NEXT: stxvd2x v5, 0, r3
; CHECK-P8-NEXT: blr
@ -458,11 +458,11 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x i32>) %agg.result
; CHECK-P8-NEXT: xvcvdpsxws v3, vs0
; CHECK-P8-NEXT: vmrgew v4, v5, v4
; CHECK-P8-NEXT: xvcvdpsxws v5, vs1
; CHECK-P8-NEXT: stxvd2x v4, r3, r6
; CHECK-P8-NEXT: stxvd2x v2, r3, r5
; CHECK-P8-NEXT: vmrgew v3, v3, v0
; CHECK-P8-NEXT: xvcvdpsxws v0, vs4
; CHECK-P8-NEXT: stxvd2x v3, r3, r7
; CHECK-P8-NEXT: stxvd2x v4, r3, r6
; CHECK-P8-NEXT: stxvd2x v2, r3, r5
; CHECK-P8-NEXT: vmrgew v5, v0, v5
; CHECK-P8-NEXT: stxvd2x v5, 0, r3
; CHECK-P8-NEXT: blr

View File

@ -68,8 +68,8 @@ define void @test8elt(ptr noalias nocapture sret(<8 x i32>) %agg.result, ptr noc
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvspuxws vs1, vs1
; CHECK-P8-NEXT: xvcvspuxws vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
@ -215,8 +215,8 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x i32>) %agg.result,
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvspsxws vs1, vs1
; CHECK-P8-NEXT: xvcvspsxws vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:

View File

@ -37,8 +37,8 @@ define void @test4elt(ptr noalias nocapture sret(<4 x i64>) %agg.result, ptr noc
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvdpuxds vs1, vs1
; CHECK-P8-NEXT: xvcvdpuxds vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
@ -251,8 +251,8 @@ define void @test4elt_signed(ptr noalias nocapture sret(<4 x i64>) %agg.result,
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvdpsxds vs1, vs1
; CHECK-P8-NEXT: xvcvdpsxds vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:

View File

@ -166,10 +166,10 @@ define void @test16elt(ptr noalias nocapture sret(<16 x float>) %agg.result, ptr
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 32
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
@ -397,9 +397,9 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x float>) %agg.resu
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 32
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:

View File

@ -68,9 +68,9 @@ define void @test4elt(ptr noalias nocapture sret(<4 x double>) %agg.result, i64
; CHECK-P8-NEXT: vperm v3, v5, v4, v3
; CHECK-P8-NEXT: xvcvuxddp vs1, v3
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
@ -141,6 +141,7 @@ define void @test8elt(ptr noalias nocapture sret(<8 x double>) %agg.result, <8 x
; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: vperm v3, v1, v2, v3
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd v0, vs2
; CHECK-P8-NEXT: xvcvuxddp vs2, v3
; CHECK-P8-NEXT: xxswapd vs1, vs1
@ -153,7 +154,6 @@ define void @test8elt(ptr noalias nocapture sret(<8 x double>) %agg.result, <8 x
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
@ -257,6 +257,7 @@ define void @test16elt(ptr noalias nocapture sret(<16 x double>) %agg.result, pt
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xvcvuxddp vs7, v8
; CHECK-P8-NEXT: xvcvuxddp vs2, v4
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: xxswapd v7, vs0
; CHECK-P8-NEXT: xxswapd vs4, vs4
; CHECK-P8-NEXT: xxswapd vs3, vs3
@ -277,11 +278,10 @@ define void @test16elt(ptr noalias nocapture sret(<16 x double>) %agg.result, pt
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs5, r3, r4
; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 32
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
@ -445,9 +445,9 @@ define void @test4elt_signed(ptr noalias nocapture sret(<4 x double>) %agg.resul
; CHECK-P8-NEXT: vsrad v2, v2, v4
; CHECK-P8-NEXT: xvcvsxddp vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:
@ -539,10 +539,10 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x double>) %agg.resul
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 32
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
@ -684,6 +684,8 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x double>) %agg.res
; CHECK-P8-NEXT: stxvd2x vs4, r3, r4
; CHECK-P8-NEXT: li r4, 96
; CHECK-P8-NEXT: xxswapd vs4, vs6
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: stxvd2x vs7, r3, r4
; CHECK-P8-NEXT: li r4, 80
; CHECK-P8-NEXT: stxvd2x vs4, r3, r4
@ -693,8 +695,6 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x double>) %agg.res
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 32
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:

View File

@ -185,18 +185,18 @@ define void @test16elt(ptr noalias nocapture sret(<16 x double>) %agg.result, pt
; CHECK-P8-NEXT: xvcvuxwdp vs0, v6
; CHECK-P8-NEXT: xvcvuxwdp vs1, v5
; CHECK-P8-NEXT: xxswapd vs5, vs6
; CHECK-P8-NEXT: stxvd2x vs5, 0, r3
; CHECK-P8-NEXT: xxswapd vs2, vs4
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: xxswapd vs4, vs7
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 64
; CHECK-P8-NEXT: stxvd2x vs4, r3, r7
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: stxvd2x vs0, r3, r6
; CHECK-P8-NEXT: stxvd2x vs4, r3, r7
; CHECK-P8-NEXT: stxvd2x vs5, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
@ -445,18 +445,18 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x double>) %agg.res
; CHECK-P8-NEXT: xvcvsxwdp vs0, v6
; CHECK-P8-NEXT: xvcvsxwdp vs1, v5
; CHECK-P8-NEXT: xxswapd vs5, vs6
; CHECK-P8-NEXT: stxvd2x vs5, 0, r3
; CHECK-P8-NEXT: xxswapd vs2, vs4
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: xxswapd vs4, vs7
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 64
; CHECK-P8-NEXT: stxvd2x vs4, r3, r7
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: stxvd2x vs0, r3, r6
; CHECK-P8-NEXT: stxvd2x vs4, r3, r7
; CHECK-P8-NEXT: stxvd2x vs5, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:

View File

@ -211,13 +211,13 @@ define void @test16elt(ptr noalias nocapture sret(<16 x float>) %agg.result, ptr
; CHECK-P8-NEXT: vpkudum v4, v1, v0
; CHECK-P8-NEXT: xxswapd vs1, v3
; CHECK-P8-NEXT: xxswapd vs2, v2
; CHECK-P8-NEXT: stxvd2x vs1, r3, r7
; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
; CHECK-P8-NEXT: xxsldwi v6, vs0, vs0, 3
; CHECK-P8-NEXT: xvcvuxdsp vs0, v7
; CHECK-P8-NEXT: xxsldwi v7, vs0, vs0, 3
; CHECK-P8-NEXT: xxswapd vs0, v4
; CHECK-P8-NEXT: stxvd2x vs0, r3, r6
; CHECK-P8-NEXT: stxvd2x vs1, r3, r7
; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
; CHECK-P8-NEXT: vpkudum v5, v6, v7
; CHECK-P8-NEXT: xxswapd vs3, v5
; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
@ -503,13 +503,13 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x float>) %agg.resu
; CHECK-P8-NEXT: vpkudum v4, v1, v0
; CHECK-P8-NEXT: xxswapd vs1, v3
; CHECK-P8-NEXT: xxswapd vs2, v2
; CHECK-P8-NEXT: stxvd2x vs1, r3, r7
; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
; CHECK-P8-NEXT: xxsldwi v6, vs0, vs0, 3
; CHECK-P8-NEXT: xvcvsxdsp vs0, v7
; CHECK-P8-NEXT: xxsldwi v7, vs0, vs0, 3
; CHECK-P8-NEXT: xxswapd vs0, v4
; CHECK-P8-NEXT: stxvd2x vs0, r3, r6
; CHECK-P8-NEXT: stxvd2x vs1, r3, r7
; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
; CHECK-P8-NEXT: vpkudum v5, v6, v7
; CHECK-P8-NEXT: xxswapd vs3, v5
; CHECK-P8-NEXT: stxvd2x vs3, 0, r3

View File

@ -122,9 +122,9 @@ define void @test8elt(ptr noalias nocapture sret(<8 x float>) %agg.result, i64 %
; CHECK-P8-NEXT: vperm v3, v5, v4, v3
; CHECK-P8-NEXT: xvcvuxwsp vs1, v3
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
@ -195,6 +195,7 @@ define void @test16elt(ptr noalias nocapture sret(<16 x float>) %agg.result, <16
; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: vperm v3, v1, v2, v3
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd v0, vs2
; CHECK-P8-NEXT: xvcvuxwsp vs2, v3
; CHECK-P8-NEXT: xxswapd vs1, vs1
@ -207,7 +208,6 @@ define void @test16elt(ptr noalias nocapture sret(<16 x float>) %agg.result, <16
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
@ -398,9 +398,9 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x float>) %agg.result
; CHECK-P8-NEXT: vsraw v2, v2, v3
; CHECK-P8-NEXT: xvcvsxwsp vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
@ -480,6 +480,7 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x float>) %agg.resu
; CHECK-P8-NEXT: vslw v4, v4, v3
; CHECK-P8-NEXT: xxswapd v1, vs2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: vperm v2, v2, v2, v1
; CHECK-P8-NEXT: vslw v2, v2, v3
; CHECK-P8-NEXT: vsraw v2, v2, v3
@ -494,7 +495,6 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x float>) %agg.resu
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:

View File

@ -68,9 +68,9 @@ define void @test4elt(ptr noalias nocapture sret(<4 x double>) %agg.result, i32
; CHECK-P8-NEXT: vperm v3, v5, v4, v3
; CHECK-P8-NEXT: xvcvuxddp vs1, v3
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
@ -142,6 +142,7 @@ define void @test8elt(ptr noalias nocapture sret(<8 x double>) %agg.result, i64
; CHECK-P8-NEXT: lxvd2x vs2, 0, r5
; CHECK-P8-NEXT: vperm v2, v1, v0, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd v5, vs2
; CHECK-P8-NEXT: xvcvuxddp vs2, v2
; CHECK-P8-NEXT: xxswapd vs1, vs1
@ -154,7 +155,6 @@ define void @test8elt(ptr noalias nocapture sret(<8 x double>) %agg.result, i64
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
@ -256,6 +256,7 @@ define void @test16elt(ptr noalias nocapture sret(<16 x double>) %agg.result, <1
; CHECK-P8-NEXT: xxswapd v1, vs2
; CHECK-P8-NEXT: vperm v1, v0, v2, v1
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd v6, vs3
; CHECK-P8-NEXT: vperm v6, v0, v2, v6
; CHECK-P8-NEXT: xvcvuxddp vs6, v6
@ -297,7 +298,6 @@ define void @test16elt(ptr noalias nocapture sret(<16 x double>) %agg.result, <1
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
@ -480,9 +480,9 @@ define void @test4elt_signed(ptr noalias nocapture sret(<4 x double>) %agg.resul
; CHECK-P8-NEXT: vsrad v2, v2, v4
; CHECK-P8-NEXT: xvcvsxddp vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:
@ -575,10 +575,10 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x double>) %agg.resul
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 32
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
@ -733,6 +733,7 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x double>) %agg.res
; CHECK-P8-NEXT: stxvd2x vs4, r3, r4
; CHECK-P8-NEXT: li r4, 96
; CHECK-P8-NEXT: xxswapd vs4, vs5
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 80
; CHECK-P8-NEXT: stxvd2x vs7, r3, r4
@ -745,7 +746,6 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x double>) %agg.res
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:

View File

@ -68,8 +68,8 @@ define void @test8elt(ptr noalias nocapture sret(<8 x float>) %agg.result, ptr n
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvuxwsp vs1, vs1
; CHECK-P8-NEXT: xvcvuxwsp vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
@ -215,8 +215,8 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x float>) %agg.result
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvsxwsp vs1, vs1
; CHECK-P8-NEXT: xvcvsxwsp vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:

View File

@ -37,8 +37,8 @@ define void @test4elt(ptr noalias nocapture sret(<4 x double>) %agg.result, ptr
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvuxddp vs1, vs1
; CHECK-P8-NEXT: xvcvuxddp vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
@ -251,8 +251,8 @@ define void @test4elt_signed(ptr noalias nocapture sret(<4 x double>) %agg.resul
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvsxddp vs1, vs1
; CHECK-P8-NEXT: xvcvsxddp vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:

View File

@ -694,12 +694,12 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; LE-64BIT-NEXT: lxvd2x 0, 3, 6
; LE-64BIT-NEXT: lwz 3, 0(4)
; LE-64BIT-NEXT: li 4, 48
; LE-64BIT-NEXT: stxvd2x 2, 7, 4
; LE-64BIT-NEXT: stxvd2x 2, 7, 8
; LE-64BIT-NEXT: stxvd2x 2, 7, 4
; LE-64BIT-NEXT: rlwinm 4, 3, 0, 27, 28
; LE-64BIT-NEXT: stxvd2x 1, 0, 7
; LE-64BIT-NEXT: rlwinm 3, 3, 3, 26, 28
; LE-64BIT-NEXT: stxvd2x 0, 7, 6
; LE-64BIT-NEXT: stxvd2x 1, 0, 7
; LE-64BIT-NEXT: ldux 6, 4, 7
; LE-64BIT-NEXT: subfic 7, 3, 64
; LE-64BIT-NEXT: ld 8, 8(4)
@ -868,12 +868,12 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; LE-64BIT-NEXT: lxvd2x 0, 3, 6
; LE-64BIT-NEXT: lwz 3, 0(4)
; LE-64BIT-NEXT: li 4, 48
; LE-64BIT-NEXT: stxvd2x 2, 7, 4
; LE-64BIT-NEXT: stxvd2x 2, 7, 8
; LE-64BIT-NEXT: stxvd2x 2, 7, 4
; LE-64BIT-NEXT: rlwinm 4, 3, 2, 27, 28
; LE-64BIT-NEXT: stxvd2x 1, 0, 7
; LE-64BIT-NEXT: rlwinm 3, 3, 5, 26, 26
; LE-64BIT-NEXT: stxvd2x 0, 7, 6
; LE-64BIT-NEXT: stxvd2x 1, 0, 7
; LE-64BIT-NEXT: ldux 6, 4, 7
; LE-64BIT-NEXT: subfic 7, 3, 64
; LE-64BIT-NEXT: ld 8, 8(4)
@ -1008,14 +1008,14 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; LE-64BIT-NEXT: stxvd2x 2, 7, 4
; LE-64BIT-NEXT: li 4, 32
; LE-64BIT-NEXT: rlwinm 3, 3, 3, 27, 28
; LE-64BIT-NEXT: stxvd2x 1, 0, 7
; LE-64BIT-NEXT: stxvd2x 2, 7, 4
; LE-64BIT-NEXT: stxvd2x 0, 7, 6
; LE-64BIT-NEXT: stxvd2x 1, 0, 7
; LE-64BIT-NEXT: lxvd2x 0, 7, 3
; LE-64BIT-NEXT: add 3, 7, 3
; LE-64BIT-NEXT: lxvd2x 1, 3, 6
; LE-64BIT-NEXT: stxvd2x 1, 5, 6
; LE-64BIT-NEXT: stxvd2x 0, 0, 5
; LE-64BIT-NEXT: stxvd2x 1, 5, 6
; LE-64BIT-NEXT: blr
;
; BE-LABEL: lshr_32bytes_dwordOff:
@ -1111,37 +1111,37 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; LE-64BIT: # %bb.0:
; LE-64BIT-NEXT: li 6, 16
; LE-64BIT-NEXT: lwz 4, 0(4)
; LE-64BIT-NEXT: xxlxor 2, 2, 2
; LE-64BIT-NEXT: addi 7, 1, -64
; LE-64BIT-NEXT: lxvd2x 1, 0, 3
; LE-64BIT-NEXT: addi 8, 1, -32
; LE-64BIT-NEXT: addi 7, 1, -64
; LE-64BIT-NEXT: li 8, 48
; LE-64BIT-NEXT: lxvd2x 0, 3, 6
; LE-64BIT-NEXT: stxvd2x 2, 7, 6
; LE-64BIT-NEXT: li 6, 48
; LE-64BIT-NEXT: rlwinm 3, 4, 0, 27, 28
; LE-64BIT-NEXT: rlwinm 4, 4, 3, 26, 28
; LE-64BIT-NEXT: neg 3, 3
; LE-64BIT-NEXT: stxvd2x 0, 7, 6
; LE-64BIT-NEXT: li 6, 32
; LE-64BIT-NEXT: stxvd2x 0, 7, 8
; LE-64BIT-NEXT: xxlxor 0, 0, 0
; LE-64BIT-NEXT: li 8, 32
; LE-64BIT-NEXT: extsw 3, 3
; LE-64BIT-NEXT: stxvd2x 1, 7, 6
; LE-64BIT-NEXT: stxvd2x 2, 0, 7
; LE-64BIT-NEXT: subfic 6, 4, 64
; LE-64BIT-NEXT: ldux 3, 8, 3
; LE-64BIT-NEXT: ld 7, 16(8)
; LE-64BIT-NEXT: ld 9, 24(8)
; LE-64BIT-NEXT: ld 8, 8(8)
; LE-64BIT-NEXT: srd 10, 7, 6
; LE-64BIT-NEXT: stxvd2x 0, 7, 6
; LE-64BIT-NEXT: addi 6, 1, -32
; LE-64BIT-NEXT: stxvd2x 0, 0, 7
; LE-64BIT-NEXT: stxvd2x 1, 7, 8
; LE-64BIT-NEXT: subfic 7, 4, 64
; LE-64BIT-NEXT: ldux 3, 6, 3
; LE-64BIT-NEXT: ld 8, 16(6)
; LE-64BIT-NEXT: ld 9, 24(6)
; LE-64BIT-NEXT: ld 6, 8(6)
; LE-64BIT-NEXT: srd 10, 8, 7
; LE-64BIT-NEXT: sld 9, 9, 4
; LE-64BIT-NEXT: sld 7, 7, 4
; LE-64BIT-NEXT: or 9, 9, 10
; LE-64BIT-NEXT: srd 10, 8, 6
; LE-64BIT-NEXT: srd 6, 3, 6
; LE-64BIT-NEXT: sld 8, 8, 4
; LE-64BIT-NEXT: or 9, 9, 10
; LE-64BIT-NEXT: srd 10, 6, 7
; LE-64BIT-NEXT: srd 7, 3, 7
; LE-64BIT-NEXT: sld 6, 6, 4
; LE-64BIT-NEXT: sld 3, 3, 4
; LE-64BIT-NEXT: or 6, 8, 6
; LE-64BIT-NEXT: or 6, 6, 7
; LE-64BIT-NEXT: std 3, 0(5)
; LE-64BIT-NEXT: or 3, 7, 10
; LE-64BIT-NEXT: or 3, 8, 10
; LE-64BIT-NEXT: std 9, 24(5)
; LE-64BIT-NEXT: std 6, 8(5)
; LE-64BIT-NEXT: std 3, 16(5)
@ -1285,37 +1285,37 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; LE-64BIT: # %bb.0:
; LE-64BIT-NEXT: li 6, 16
; LE-64BIT-NEXT: lwz 4, 0(4)
; LE-64BIT-NEXT: xxlxor 2, 2, 2
; LE-64BIT-NEXT: addi 7, 1, -64
; LE-64BIT-NEXT: lxvd2x 1, 0, 3
; LE-64BIT-NEXT: addi 8, 1, -32
; LE-64BIT-NEXT: addi 7, 1, -64
; LE-64BIT-NEXT: li 8, 48
; LE-64BIT-NEXT: lxvd2x 0, 3, 6
; LE-64BIT-NEXT: stxvd2x 2, 7, 6
; LE-64BIT-NEXT: li 6, 48
; LE-64BIT-NEXT: rlwinm 3, 4, 2, 27, 28
; LE-64BIT-NEXT: rlwinm 4, 4, 5, 26, 26
; LE-64BIT-NEXT: neg 3, 3
; LE-64BIT-NEXT: stxvd2x 0, 7, 6
; LE-64BIT-NEXT: li 6, 32
; LE-64BIT-NEXT: stxvd2x 0, 7, 8
; LE-64BIT-NEXT: xxlxor 0, 0, 0
; LE-64BIT-NEXT: li 8, 32
; LE-64BIT-NEXT: extsw 3, 3
; LE-64BIT-NEXT: stxvd2x 1, 7, 6
; LE-64BIT-NEXT: stxvd2x 2, 0, 7
; LE-64BIT-NEXT: subfic 6, 4, 64
; LE-64BIT-NEXT: ldux 3, 8, 3
; LE-64BIT-NEXT: ld 7, 16(8)
; LE-64BIT-NEXT: ld 9, 24(8)
; LE-64BIT-NEXT: ld 8, 8(8)
; LE-64BIT-NEXT: srd 10, 7, 6
; LE-64BIT-NEXT: stxvd2x 0, 7, 6
; LE-64BIT-NEXT: addi 6, 1, -32
; LE-64BIT-NEXT: stxvd2x 0, 0, 7
; LE-64BIT-NEXT: stxvd2x 1, 7, 8
; LE-64BIT-NEXT: subfic 7, 4, 64
; LE-64BIT-NEXT: ldux 3, 6, 3
; LE-64BIT-NEXT: ld 8, 16(6)
; LE-64BIT-NEXT: ld 9, 24(6)
; LE-64BIT-NEXT: ld 6, 8(6)
; LE-64BIT-NEXT: srd 10, 8, 7
; LE-64BIT-NEXT: sld 9, 9, 4
; LE-64BIT-NEXT: sld 7, 7, 4
; LE-64BIT-NEXT: or 9, 9, 10
; LE-64BIT-NEXT: srd 10, 8, 6
; LE-64BIT-NEXT: srd 6, 3, 6
; LE-64BIT-NEXT: sld 8, 8, 4
; LE-64BIT-NEXT: or 9, 9, 10
; LE-64BIT-NEXT: srd 10, 6, 7
; LE-64BIT-NEXT: srd 7, 3, 7
; LE-64BIT-NEXT: sld 6, 6, 4
; LE-64BIT-NEXT: sld 3, 3, 4
; LE-64BIT-NEXT: or 6, 8, 6
; LE-64BIT-NEXT: or 6, 6, 7
; LE-64BIT-NEXT: std 3, 0(5)
; LE-64BIT-NEXT: or 3, 7, 10
; LE-64BIT-NEXT: or 3, 8, 10
; LE-64BIT-NEXT: std 9, 24(5)
; LE-64BIT-NEXT: std 6, 8(5)
; LE-64BIT-NEXT: std 3, 16(5)
@ -1423,25 +1423,25 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; LE-64BIT: # %bb.0:
; LE-64BIT-NEXT: li 6, 16
; LE-64BIT-NEXT: lxvd2x 1, 0, 3
; LE-64BIT-NEXT: xxlxor 2, 2, 2
; LE-64BIT-NEXT: li 7, 48
; LE-64BIT-NEXT: xxlxor 2, 2, 2
; LE-64BIT-NEXT: lxvd2x 0, 3, 6
; LE-64BIT-NEXT: lwz 3, 0(4)
; LE-64BIT-NEXT: addi 4, 1, -64
; LE-64BIT-NEXT: stxvd2x 2, 4, 6
; LE-64BIT-NEXT: stxvd2x 2, 0, 4
; LE-64BIT-NEXT: rlwinm 3, 3, 3, 27, 28
; LE-64BIT-NEXT: stxvd2x 0, 4, 7
; LE-64BIT-NEXT: li 7, 32
; LE-64BIT-NEXT: neg 3, 3
; LE-64BIT-NEXT: stxvd2x 1, 4, 7
; LE-64BIT-NEXT: stxvd2x 2, 0, 4
; LE-64BIT-NEXT: extsw 3, 3
; LE-64BIT-NEXT: addi 4, 1, -32
; LE-64BIT-NEXT: lxvd2x 0, 4, 3
; LE-64BIT-NEXT: add 3, 4, 3
; LE-64BIT-NEXT: lxvd2x 1, 3, 6
; LE-64BIT-NEXT: stxvd2x 1, 5, 6
; LE-64BIT-NEXT: stxvd2x 0, 0, 5
; LE-64BIT-NEXT: stxvd2x 1, 5, 6
; LE-64BIT-NEXT: blr
;
; BE-LABEL: shl_32bytes_dwordOff:
@ -1541,17 +1541,17 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; LE-64BIT-NEXT: ld 3, 16(3)
; LE-64BIT-NEXT: sradi 8, 6, 63
; LE-64BIT-NEXT: rlwinm 9, 4, 0, 27, 28
; LE-64BIT-NEXT: stxvd2x 0, 0, 7
; LE-64BIT-NEXT: std 6, -40(1)
; LE-64BIT-NEXT: std 3, -48(1)
; LE-64BIT-NEXT: rlwinm 3, 4, 3, 26, 28
; LE-64BIT-NEXT: stxvd2x 0, 0, 7
; LE-64BIT-NEXT: std 8, -8(1)
; LE-64BIT-NEXT: std 8, -16(1)
; LE-64BIT-NEXT: std 8, -24(1)
; LE-64BIT-NEXT: std 8, -32(1)
; LE-64BIT-NEXT: rlwinm 3, 4, 3, 26, 28
; LE-64BIT-NEXT: subfic 6, 3, 64
; LE-64BIT-NEXT: ldux 4, 9, 7
; LE-64BIT-NEXT: ld 7, 8(9)
; LE-64BIT-NEXT: subfic 6, 3, 64
; LE-64BIT-NEXT: ld 8, 16(9)
; LE-64BIT-NEXT: ld 9, 24(9)
; LE-64BIT-NEXT: srd 4, 4, 3
@ -1716,17 +1716,17 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; LE-64BIT-NEXT: ld 3, 16(3)
; LE-64BIT-NEXT: sradi 8, 6, 63
; LE-64BIT-NEXT: rlwinm 9, 4, 2, 27, 28
; LE-64BIT-NEXT: stxvd2x 0, 0, 7
; LE-64BIT-NEXT: std 6, -40(1)
; LE-64BIT-NEXT: std 3, -48(1)
; LE-64BIT-NEXT: rlwinm 3, 4, 5, 26, 26
; LE-64BIT-NEXT: stxvd2x 0, 0, 7
; LE-64BIT-NEXT: std 8, -8(1)
; LE-64BIT-NEXT: std 8, -16(1)
; LE-64BIT-NEXT: std 8, -24(1)
; LE-64BIT-NEXT: std 8, -32(1)
; LE-64BIT-NEXT: rlwinm 3, 4, 5, 26, 26
; LE-64BIT-NEXT: subfic 6, 3, 64
; LE-64BIT-NEXT: ldux 4, 9, 7
; LE-64BIT-NEXT: ld 7, 8(9)
; LE-64BIT-NEXT: subfic 6, 3, 64
; LE-64BIT-NEXT: ld 8, 16(9)
; LE-64BIT-NEXT: ld 9, 24(9)
; LE-64BIT-NEXT: srd 4, 4, 3
@ -1848,16 +1848,16 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: ashr_32bytes_dwordOff:
; LE-64BIT: # %bb.0:
; LE-64BIT-NEXT: lxvd2x 0, 0, 3
; LE-64BIT-NEXT: ld 6, 16(3)
; LE-64BIT-NEXT: ld 7, 24(3)
; LE-64BIT-NEXT: lxvd2x 0, 0, 3
; LE-64BIT-NEXT: lwz 3, 0(4)
; LE-64BIT-NEXT: addi 4, 1, -64
; LE-64BIT-NEXT: rlwinm 3, 3, 3, 27, 28
; LE-64BIT-NEXT: stxvd2x 0, 0, 4
; LE-64BIT-NEXT: std 6, -48(1)
; LE-64BIT-NEXT: sradi 6, 7, 63
; LE-64BIT-NEXT: rlwinm 3, 3, 3, 27, 28
; LE-64BIT-NEXT: std 7, -40(1)
; LE-64BIT-NEXT: stxvd2x 0, 0, 4
; LE-64BIT-NEXT: std 6, -8(1)
; LE-64BIT-NEXT: std 6, -16(1)
; LE-64BIT-NEXT: std 6, -24(1)
@ -1866,8 +1866,8 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; LE-64BIT-NEXT: add 3, 4, 3
; LE-64BIT-NEXT: li 4, 16
; LE-64BIT-NEXT: lxvd2x 1, 3, 4
; LE-64BIT-NEXT: stxvd2x 1, 5, 4
; LE-64BIT-NEXT: stxvd2x 0, 0, 5
; LE-64BIT-NEXT: stxvd2x 1, 5, 4
; LE-64BIT-NEXT: blr
;
; BE-LABEL: ashr_32bytes_dwordOff:

View File

@ -435,12 +435,12 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; LE-64BIT-NEXT: lxvd2x 0, 3, 6
; LE-64BIT-NEXT: lwz 3, 0(4)
; LE-64BIT-NEXT: li 4, 48
; LE-64BIT-NEXT: stxvd2x 2, 7, 4
; LE-64BIT-NEXT: stxvd2x 2, 7, 8
; LE-64BIT-NEXT: stxvd2x 2, 7, 4
; LE-64BIT-NEXT: rlwinm 4, 3, 29, 27, 28
; LE-64BIT-NEXT: stxvd2x 1, 0, 7
; LE-64BIT-NEXT: clrlwi 3, 3, 26
; LE-64BIT-NEXT: stxvd2x 0, 7, 6
; LE-64BIT-NEXT: stxvd2x 1, 0, 7
; LE-64BIT-NEXT: xori 8, 3, 63
; LE-64BIT-NEXT: ldux 6, 4, 7
; LE-64BIT-NEXT: ld 7, 16(4)
@ -605,37 +605,37 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; LE-64BIT: # %bb.0:
; LE-64BIT-NEXT: li 6, 16
; LE-64BIT-NEXT: lwz 4, 0(4)
; LE-64BIT-NEXT: xxlxor 2, 2, 2
; LE-64BIT-NEXT: addi 7, 1, -64
; LE-64BIT-NEXT: lxvd2x 1, 0, 3
; LE-64BIT-NEXT: addi 8, 1, -32
; LE-64BIT-NEXT: addi 7, 1, -64
; LE-64BIT-NEXT: li 8, 48
; LE-64BIT-NEXT: lxvd2x 0, 3, 6
; LE-64BIT-NEXT: stxvd2x 2, 7, 6
; LE-64BIT-NEXT: li 6, 48
; LE-64BIT-NEXT: rlwinm 3, 4, 29, 27, 28
; LE-64BIT-NEXT: clrlwi 4, 4, 26
; LE-64BIT-NEXT: neg 3, 3
; LE-64BIT-NEXT: stxvd2x 0, 7, 6
; LE-64BIT-NEXT: li 6, 32
; LE-64BIT-NEXT: stxvd2x 0, 7, 8
; LE-64BIT-NEXT: xxlxor 0, 0, 0
; LE-64BIT-NEXT: li 8, 32
; LE-64BIT-NEXT: extsw 3, 3
; LE-64BIT-NEXT: stxvd2x 1, 7, 6
; LE-64BIT-NEXT: stxvd2x 2, 0, 7
; LE-64BIT-NEXT: subfic 6, 4, 64
; LE-64BIT-NEXT: ldux 3, 8, 3
; LE-64BIT-NEXT: ld 7, 16(8)
; LE-64BIT-NEXT: ld 9, 24(8)
; LE-64BIT-NEXT: ld 8, 8(8)
; LE-64BIT-NEXT: srd 10, 7, 6
; LE-64BIT-NEXT: stxvd2x 0, 7, 6
; LE-64BIT-NEXT: addi 6, 1, -32
; LE-64BIT-NEXT: stxvd2x 0, 0, 7
; LE-64BIT-NEXT: stxvd2x 1, 7, 8
; LE-64BIT-NEXT: subfic 7, 4, 64
; LE-64BIT-NEXT: ldux 3, 6, 3
; LE-64BIT-NEXT: ld 8, 16(6)
; LE-64BIT-NEXT: ld 9, 24(6)
; LE-64BIT-NEXT: ld 6, 8(6)
; LE-64BIT-NEXT: srd 10, 8, 7
; LE-64BIT-NEXT: sld 9, 9, 4
; LE-64BIT-NEXT: sld 7, 7, 4
; LE-64BIT-NEXT: or 9, 9, 10
; LE-64BIT-NEXT: srd 10, 8, 6
; LE-64BIT-NEXT: srd 6, 3, 6
; LE-64BIT-NEXT: sld 8, 8, 4
; LE-64BIT-NEXT: or 9, 9, 10
; LE-64BIT-NEXT: srd 10, 6, 7
; LE-64BIT-NEXT: srd 7, 3, 7
; LE-64BIT-NEXT: sld 6, 6, 4
; LE-64BIT-NEXT: sld 3, 3, 4
; LE-64BIT-NEXT: or 6, 8, 6
; LE-64BIT-NEXT: or 6, 6, 7
; LE-64BIT-NEXT: std 3, 0(5)
; LE-64BIT-NEXT: or 3, 7, 10
; LE-64BIT-NEXT: or 3, 8, 10
; LE-64BIT-NEXT: std 9, 24(5)
; LE-64BIT-NEXT: std 6, 8(5)
; LE-64BIT-NEXT: std 3, 16(5)
@ -782,10 +782,10 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; LE-64BIT-NEXT: ld 3, 16(3)
; LE-64BIT-NEXT: sradi 8, 6, 63
; LE-64BIT-NEXT: rlwinm 9, 4, 29, 27, 28
; LE-64BIT-NEXT: clrlwi 4, 4, 26
; LE-64BIT-NEXT: stxvd2x 0, 0, 7
; LE-64BIT-NEXT: std 6, -40(1)
; LE-64BIT-NEXT: std 3, -48(1)
; LE-64BIT-NEXT: clrlwi 4, 4, 26
; LE-64BIT-NEXT: stxvd2x 0, 0, 7
; LE-64BIT-NEXT: std 8, -8(1)
; LE-64BIT-NEXT: std 8, -16(1)
; LE-64BIT-NEXT: std 8, -24(1)