
On PowerPC there are 128 bit VSX registers. These registers are half overlapped with 64 bit floating point registers (FPR). The 64 bit half of the VXS register that does not overlap with the FPR does not overlap with any other register class. The FPR are the only subregisters of the VSX registers but they do not fully cover the 128 bit super register. This leads to incorrect lane masks being created. This patch adds phony registers for the other half of the VSX registers in order to fully cover them and to make sure that the lane masks are not the same for the VSX and the floating point register.
4208 lines
145 KiB
LLVM
4208 lines
145 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE
|
|
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE
|
|
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mattr=-paired-vector-memops -mcpu=pwr10 -mtriple=powerpc64le < %s | \
|
|
; RUN: FileCheck %s --check-prefix=PWR10LE
|
|
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mattr=-paired-vector-memops -mcpu=pwr10 -mtriple=powerpc64 < %s | \
|
|
; RUN: FileCheck %s --check-prefix=PWR10BE
|
|
|
|
;;
|
|
;; Vectors of f32
|
|
;;
|
|
define dso_local float @v2f32(<2 x float> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v2f32:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; PWR9LE-NEXT: xxswapd vs1, v2
|
|
; PWR9LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v2f32:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR9BE-NEXT: xscvspdpn f0, v2
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v2f32:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; PWR10LE-NEXT: xxswapd vs1, v2
|
|
; PWR10LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v2f32:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR10BE-NEXT: xscvspdpn f0, v2
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v2f32_b(<2 x float> %a, float %b) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v2f32_b:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; PWR9LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR9LE-NEXT: xsaddsp f0, f1, f0
|
|
; PWR9LE-NEXT: xxswapd vs1, v2
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v2f32_b:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xscvspdpn f0, v2
|
|
; PWR9BE-NEXT: xsaddsp f0, f1, f0
|
|
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v2f32_b:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; PWR10LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR10LE-NEXT: xsaddsp f0, f1, f0
|
|
; PWR10LE-NEXT: xxswapd vs1, v2
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v2f32_b:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xscvspdpn f0, v2
|
|
; PWR10BE-NEXT: xsaddsp f0, f1, f0
|
|
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call float @llvm.vector.reduce.fadd.v2f32(float %b, <2 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v2f32_fast(<2 x float> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v2f32_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxspltw vs0, v2, 2
|
|
; PWR9LE-NEXT: xvaddsp vs0, v2, vs0
|
|
; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v2f32_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxspltw vs0, v2, 1
|
|
; PWR9BE-NEXT: xvaddsp vs0, v2, vs0
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v2f32_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxspltw vs0, v2, 2
|
|
; PWR10LE-NEXT: xvaddsp vs0, v2, vs0
|
|
; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v2f32_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxspltw vs0, v2, 1
|
|
; PWR10BE-NEXT: xvaddsp vs0, v2, vs0
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v4f32(<4 x float> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v4f32:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; PWR9LE-NEXT: xxswapd vs1, v2
|
|
; PWR9LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xscvspdpn f1, v2
|
|
; PWR9LE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v4f32:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR9BE-NEXT: xscvspdpn f0, v2
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v2
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v4f32:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; PWR10LE-NEXT: xxswapd vs1, v2
|
|
; PWR10LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xscvspdpn f1, v2
|
|
; PWR10LE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v4f32:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR10BE-NEXT: xscvspdpn f0, v2
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v2
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v4f32_b(<4 x float> %a, float %b) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v4f32_b:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; PWR9LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR9LE-NEXT: xsaddsp f0, f1, f0
|
|
; PWR9LE-NEXT: xxswapd vs1, v2
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xscvspdpn f1, v2
|
|
; PWR9LE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v4f32_b:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xscvspdpn f0, v2
|
|
; PWR9BE-NEXT: xsaddsp f0, f1, f0
|
|
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v2
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v4f32_b:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; PWR10LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR10LE-NEXT: xsaddsp f0, f1, f0
|
|
; PWR10LE-NEXT: xxswapd vs1, v2
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xscvspdpn f1, v2
|
|
; PWR10LE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v4f32_b:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xscvspdpn f0, v2
|
|
; PWR10BE-NEXT: xsaddsp f0, f1, f0
|
|
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v2
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call float @llvm.vector.reduce.fadd.v4f32(float %b, <4 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v4f32_fast(<4 x float> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v4f32_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: xvaddsp vs0, v2, v3
|
|
; PWR9LE-NEXT: xxspltw vs1, vs0, 2
|
|
; PWR9LE-NEXT: xvaddsp vs0, vs0, vs1
|
|
; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v4f32_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: xvaddsp vs0, v2, v3
|
|
; PWR9BE-NEXT: xxspltw vs1, vs0, 1
|
|
; PWR9BE-NEXT: xvaddsp vs0, vs0, vs1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v4f32_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: xvaddsp vs0, v2, v3
|
|
; PWR10LE-NEXT: xxspltw vs1, vs0, 2
|
|
; PWR10LE-NEXT: xvaddsp vs0, vs0, vs1
|
|
; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v4f32_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: xvaddsp vs0, v2, v3
|
|
; PWR10BE-NEXT: xxspltw vs1, vs0, 1
|
|
; PWR10BE-NEXT: xvaddsp vs0, vs0, vs1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v8f32(<8 x float> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v8f32:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; PWR9LE-NEXT: xxswapd vs1, v2
|
|
; PWR9LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xscvspdpn f1, v2
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xscvspdpn f1, v3
|
|
; PWR9LE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v8f32:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR9BE-NEXT: xscvspdpn f0, v2
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v2
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xscvspdpn f1, v3
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v3
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v8f32:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; PWR10LE-NEXT: xxswapd vs1, v2
|
|
; PWR10LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xscvspdpn f1, v2
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xscvspdpn f1, v3
|
|
; PWR10LE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v8f32:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR10BE-NEXT: xscvspdpn f0, v2
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v2
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xscvspdpn f1, v3
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v3
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v8f32_b(<8 x float> %a, float %b) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v8f32_b:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; PWR9LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR9LE-NEXT: xsaddsp f0, f1, f0
|
|
; PWR9LE-NEXT: xxswapd vs1, v2
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xscvspdpn f1, v2
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xscvspdpn f1, v3
|
|
; PWR9LE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v8f32_b:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xscvspdpn f0, v2
|
|
; PWR9BE-NEXT: xsaddsp f0, f1, f0
|
|
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v2
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xscvspdpn f1, v3
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v3
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v8f32_b:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; PWR10LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR10LE-NEXT: xsaddsp f0, f1, f0
|
|
; PWR10LE-NEXT: xxswapd vs1, v2
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xscvspdpn f1, v2
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xscvspdpn f1, v3
|
|
; PWR10LE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v8f32_b:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xscvspdpn f0, v2
|
|
; PWR10BE-NEXT: xsaddsp f0, f1, f0
|
|
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v2
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xscvspdpn f1, v3
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v3
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call float @llvm.vector.reduce.fadd.v8f32(float %b, <8 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v8f32_fast(<8 x float> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v8f32_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xvaddsp vs0, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v2, vs0
|
|
; PWR9LE-NEXT: xvaddsp vs0, vs0, v2
|
|
; PWR9LE-NEXT: xxspltw vs1, vs0, 2
|
|
; PWR9LE-NEXT: xvaddsp vs0, vs0, vs1
|
|
; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v8f32_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xvaddsp vs0, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v2, vs0
|
|
; PWR9BE-NEXT: xvaddsp vs0, vs0, v2
|
|
; PWR9BE-NEXT: xxspltw vs1, vs0, 1
|
|
; PWR9BE-NEXT: xvaddsp vs0, vs0, vs1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v8f32_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xvaddsp vs0, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v2, vs0
|
|
; PWR10LE-NEXT: xvaddsp vs0, vs0, v2
|
|
; PWR10LE-NEXT: xxspltw vs1, vs0, 2
|
|
; PWR10LE-NEXT: xvaddsp vs0, vs0, vs1
|
|
; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v8f32_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xvaddsp vs0, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v2, vs0
|
|
; PWR10BE-NEXT: xvaddsp vs0, vs0, v2
|
|
; PWR10BE-NEXT: xxspltw vs1, vs0, 1
|
|
; PWR10BE-NEXT: xvaddsp vs0, vs0, vs1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v16f32(<16 x float> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16f32:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; PWR9LE-NEXT: xxswapd vs1, v2
|
|
; PWR9LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xscvspdpn f1, v2
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xscvspdpn f1, v3
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v4
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 1
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xscvspdpn f1, v4
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v5
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 1
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xscvspdpn f1, v5
|
|
; PWR9LE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16f32:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR9BE-NEXT: xscvspdpn f0, v2
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v2
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xscvspdpn f1, v3
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v3
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xscvspdpn f1, v4
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v4
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 3
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xscvspdpn f1, v5
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v5
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 3
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16f32:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; PWR10LE-NEXT: xxswapd vs1, v2
|
|
; PWR10LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xscvspdpn f1, v2
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xscvspdpn f1, v3
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v4
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 1
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xscvspdpn f1, v4
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v5
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 1
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xscvspdpn f1, v5
|
|
; PWR10LE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16f32:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR10BE-NEXT: xscvspdpn f0, v2
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v2
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xscvspdpn f1, v3
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v3
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xscvspdpn f1, v4
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v4
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 3
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xscvspdpn f1, v5
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v5
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 3
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v16f32_b(<16 x float> %a, float %b) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16f32_b:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; PWR9LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR9LE-NEXT: xsaddsp f0, f1, f0
|
|
; PWR9LE-NEXT: xxswapd vs1, v2
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xscvspdpn f1, v2
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xscvspdpn f1, v3
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v4
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 1
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xscvspdpn f1, v4
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v5
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 1
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9LE-NEXT: xscvspdpn f1, v5
|
|
; PWR9LE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16f32_b:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xscvspdpn f0, v2
|
|
; PWR9BE-NEXT: xsaddsp f0, f1, f0
|
|
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v2
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xscvspdpn f1, v3
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v3
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xscvspdpn f1, v4
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v4
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 3
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xscvspdpn f1, v5
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v5
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 3
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16f32_b:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; PWR10LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR10LE-NEXT: xsaddsp f0, f1, f0
|
|
; PWR10LE-NEXT: xxswapd vs1, v2
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xscvspdpn f1, v2
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xscvspdpn f1, v3
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v4
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 1
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xscvspdpn f1, v4
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v5
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 1
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10LE-NEXT: xscvspdpn f1, v5
|
|
; PWR10LE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16f32_b:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xscvspdpn f0, v2
|
|
; PWR10BE-NEXT: xsaddsp f0, f1, f0
|
|
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v2
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xscvspdpn f1, v3
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v3
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xscvspdpn f1, v4
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v4
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 3
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xscvspdpn f1, v5
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v5
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f0, f0, f1
|
|
; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 3
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsaddsp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call float @llvm.vector.reduce.fadd.v16f32(float %b, <16 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v16f32_fast(<16 x float> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16f32_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xvaddsp vs0, v3, v5
|
|
; PWR9LE-NEXT: xvaddsp vs1, v2, v4
|
|
; PWR9LE-NEXT: xvaddsp vs0, vs1, vs0
|
|
; PWR9LE-NEXT: xxswapd v2, vs0
|
|
; PWR9LE-NEXT: xvaddsp vs0, vs0, v2
|
|
; PWR9LE-NEXT: xxspltw vs1, vs0, 2
|
|
; PWR9LE-NEXT: xvaddsp vs0, vs0, vs1
|
|
; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16f32_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xvaddsp vs0, v3, v5
|
|
; PWR9BE-NEXT: xvaddsp vs1, v2, v4
|
|
; PWR9BE-NEXT: xvaddsp vs0, vs1, vs0
|
|
; PWR9BE-NEXT: xxswapd v2, vs0
|
|
; PWR9BE-NEXT: xvaddsp vs0, vs0, v2
|
|
; PWR9BE-NEXT: xxspltw vs1, vs0, 1
|
|
; PWR9BE-NEXT: xvaddsp vs0, vs0, vs1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16f32_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xvaddsp vs0, v3, v5
|
|
; PWR10LE-NEXT: xvaddsp vs1, v2, v4
|
|
; PWR10LE-NEXT: xvaddsp vs0, vs1, vs0
|
|
; PWR10LE-NEXT: xxswapd v2, vs0
|
|
; PWR10LE-NEXT: xvaddsp vs0, vs0, v2
|
|
; PWR10LE-NEXT: xxspltw vs1, vs0, 2
|
|
; PWR10LE-NEXT: xvaddsp vs0, vs0, vs1
|
|
; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16f32_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xvaddsp vs0, v3, v5
|
|
; PWR10BE-NEXT: xvaddsp vs1, v2, v4
|
|
; PWR10BE-NEXT: xvaddsp vs0, vs1, vs0
|
|
; PWR10BE-NEXT: xxswapd v2, vs0
|
|
; PWR10BE-NEXT: xvaddsp vs0, vs0, v2
|
|
; PWR10BE-NEXT: xxspltw vs1, vs0, 1
|
|
; PWR10BE-NEXT: xvaddsp vs0, vs0, vs1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>) #0
|
|
declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) #0
|
|
declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) #0
|
|
declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>) #0
|
|
|
|
;;
|
|
;; Vectors of f64
|
|
;;
|
|
define dso_local double @v2f64(<2 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v2f64:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd vs0, v2
|
|
; PWR9LE-NEXT: xsadddp f1, f0, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v2f64:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxswapd vs0, v2
|
|
; PWR9BE-NEXT: xsadddp f1, v2, f0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v2f64:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd vs0, v2
|
|
; PWR10LE-NEXT: xsadddp f1, f0, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v2f64:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxswapd vs0, v2
|
|
; PWR10BE-NEXT: xsadddp f1, v2, f0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v2f64_b(<2 x double> %a, double %b) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v2f64_b:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd vs0, v2
|
|
; PWR9LE-NEXT: xsadddp f0, f1, f0
|
|
; PWR9LE-NEXT: xsadddp f1, f0, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v2f64_b:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xsadddp f0, f1, v2
|
|
; PWR9BE-NEXT: xxswapd vs1, v2
|
|
; PWR9BE-NEXT: xsadddp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v2f64_b:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd vs0, v2
|
|
; PWR10LE-NEXT: xsadddp f0, f1, f0
|
|
; PWR10LE-NEXT: xsadddp f1, f0, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v2f64_b:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xsadddp f0, f1, v2
|
|
; PWR10BE-NEXT: xxswapd vs1, v2
|
|
; PWR10BE-NEXT: xsadddp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call double @llvm.vector.reduce.fadd.v2f64(double %b, <2 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v2f64_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd vs0, v2
|
|
; PWR9LE-NEXT: xvadddp vs0, v2, vs0
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v2f64_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxswapd vs0, v2
|
|
; PWR9BE-NEXT: xvadddp vs1, v2, vs0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v2f64_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd vs0, v2
|
|
; PWR10LE-NEXT: xvadddp vs0, v2, vs0
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v2f64_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxswapd vs0, v2
|
|
; PWR10BE-NEXT: xvadddp vs1, v2, vs0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v4f64(<4 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v4f64:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd vs0, v2
|
|
; PWR9LE-NEXT: xxswapd vs1, v3
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v2
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xsadddp f1, f0, v3
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v4f64:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxswapd vs0, v2
|
|
; PWR9BE-NEXT: xxswapd vs1, v3
|
|
; PWR9BE-NEXT: xsadddp f0, v2, f0
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v3
|
|
; PWR9BE-NEXT: xsadddp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v4f64:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd vs0, v2
|
|
; PWR10LE-NEXT: xxswapd vs1, v3
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v2
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xsadddp f1, f0, v3
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v4f64:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxswapd vs0, v2
|
|
; PWR10BE-NEXT: xxswapd vs1, v3
|
|
; PWR10BE-NEXT: xsadddp f0, v2, f0
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v3
|
|
; PWR10BE-NEXT: xsadddp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v4f64_b(<4 x double> %a, double %b) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v4f64_b:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd vs0, v2
|
|
; PWR9LE-NEXT: xsadddp f0, f1, f0
|
|
; PWR9LE-NEXT: xxswapd vs1, v3
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v2
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xsadddp f1, f0, v3
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v4f64_b:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xsadddp f0, f1, v2
|
|
; PWR9BE-NEXT: xxswapd vs1, v2
|
|
; PWR9BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v3
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v3
|
|
; PWR9BE-NEXT: xsadddp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v4f64_b:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd vs0, v2
|
|
; PWR10LE-NEXT: xsadddp f0, f1, f0
|
|
; PWR10LE-NEXT: xxswapd vs1, v3
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v2
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xsadddp f1, f0, v3
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v4f64_b:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xsadddp f0, f1, v2
|
|
; PWR10BE-NEXT: xxswapd vs1, v2
|
|
; PWR10BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v3
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v3
|
|
; PWR10BE-NEXT: xsadddp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call double @llvm.vector.reduce.fadd.v4f64(double %b, <4 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v4f64_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xvadddp vs0, v2, v3
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: xvadddp vs0, vs0, vs1
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v4f64_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xvadddp vs0, v2, v3
|
|
; PWR9BE-NEXT: xxswapd vs1, vs0
|
|
; PWR9BE-NEXT: xvadddp vs1, vs0, vs1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v4f64_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xvadddp vs0, v2, v3
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: xvadddp vs0, vs0, vs1
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v4f64_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xvadddp vs0, v2, v3
|
|
; PWR10BE-NEXT: xxswapd vs1, vs0
|
|
; PWR10BE-NEXT: xvadddp vs1, vs0, vs1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v8f64(<8 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v8f64:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd vs0, v2
|
|
; PWR9LE-NEXT: xxswapd vs1, v3
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v2
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v4
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v3
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v5
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v4
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xsadddp f1, f0, v5
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v8f64:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxswapd vs0, v2
|
|
; PWR9BE-NEXT: xxswapd vs1, v3
|
|
; PWR9BE-NEXT: xsadddp f0, v2, f0
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v3
|
|
; PWR9BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v4
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v4
|
|
; PWR9BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v5
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v5
|
|
; PWR9BE-NEXT: xsadddp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v8f64:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd vs0, v2
|
|
; PWR10LE-NEXT: xxswapd vs1, v3
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v2
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v4
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v3
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v5
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v4
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xsadddp f1, f0, v5
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v8f64:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxswapd vs0, v2
|
|
; PWR10BE-NEXT: xxswapd vs1, v3
|
|
; PWR10BE-NEXT: xsadddp f0, v2, f0
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v3
|
|
; PWR10BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v4
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v4
|
|
; PWR10BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v5
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v5
|
|
; PWR10BE-NEXT: xsadddp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v8f64_b(<8 x double> %a, double %b) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v8f64_b:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd vs0, v2
|
|
; PWR9LE-NEXT: xsadddp f0, f1, f0
|
|
; PWR9LE-NEXT: xxswapd vs1, v3
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v2
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v4
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v3
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v5
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v4
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xsadddp f1, f0, v5
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v8f64_b:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xsadddp f0, f1, v2
|
|
; PWR9BE-NEXT: xxswapd vs1, v2
|
|
; PWR9BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v3
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v3
|
|
; PWR9BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v4
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v4
|
|
; PWR9BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v5
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v5
|
|
; PWR9BE-NEXT: xsadddp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v8f64_b:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd vs0, v2
|
|
; PWR10LE-NEXT: xsadddp f0, f1, f0
|
|
; PWR10LE-NEXT: xxswapd vs1, v3
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v2
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v4
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v3
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v5
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v4
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xsadddp f1, f0, v5
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v8f64_b:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xsadddp f0, f1, v2
|
|
; PWR10BE-NEXT: xxswapd vs1, v2
|
|
; PWR10BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v3
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v3
|
|
; PWR10BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v4
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v4
|
|
; PWR10BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v5
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v5
|
|
; PWR10BE-NEXT: xsadddp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call double @llvm.vector.reduce.fadd.v8f64(double %b, <8 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v8f64_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xvadddp vs0, v3, v5
|
|
; PWR9LE-NEXT: xvadddp vs1, v2, v4
|
|
; PWR9LE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: xvadddp vs0, vs0, vs1
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v8f64_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xvadddp vs0, v3, v5
|
|
; PWR9BE-NEXT: xvadddp vs1, v2, v4
|
|
; PWR9BE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR9BE-NEXT: xxswapd vs1, vs0
|
|
; PWR9BE-NEXT: xvadddp vs1, vs0, vs1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v8f64_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xvadddp vs0, v3, v5
|
|
; PWR10LE-NEXT: xvadddp vs1, v2, v4
|
|
; PWR10LE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: xvadddp vs0, vs0, vs1
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v8f64_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xvadddp vs0, v3, v5
|
|
; PWR10BE-NEXT: xvadddp vs1, v2, v4
|
|
; PWR10BE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR10BE-NEXT: xxswapd vs1, vs0
|
|
; PWR10BE-NEXT: xvadddp vs1, vs0, vs1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v16f64(<16 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16f64:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd vs0, v2
|
|
; PWR9LE-NEXT: xxswapd vs1, v3
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v2
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v4
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v3
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v5
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v4
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v6
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v5
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v7
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v6
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v8
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v7
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v9
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v8
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xsadddp f1, f0, v9
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16f64:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxswapd vs0, v2
|
|
; PWR9BE-NEXT: xxswapd vs1, v3
|
|
; PWR9BE-NEXT: xsadddp f0, v2, f0
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v3
|
|
; PWR9BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v4
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v4
|
|
; PWR9BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v5
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v5
|
|
; PWR9BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v6
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v6
|
|
; PWR9BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v7
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v7
|
|
; PWR9BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v8
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v8
|
|
; PWR9BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v9
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v9
|
|
; PWR9BE-NEXT: xsadddp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16f64:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd vs0, v2
|
|
; PWR10LE-NEXT: xxswapd vs1, v3
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v2
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v4
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v3
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v5
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v4
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v6
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v5
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v7
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v6
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v8
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v7
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v9
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v8
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xsadddp f1, f0, v9
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16f64:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxswapd vs0, v2
|
|
; PWR10BE-NEXT: xxswapd vs1, v3
|
|
; PWR10BE-NEXT: xsadddp f0, v2, f0
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v3
|
|
; PWR10BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v4
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v4
|
|
; PWR10BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v5
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v5
|
|
; PWR10BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v6
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v6
|
|
; PWR10BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v7
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v7
|
|
; PWR10BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v8
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v8
|
|
; PWR10BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v9
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v9
|
|
; PWR10BE-NEXT: xsadddp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call double @llvm.vector.reduce.fadd.v16f64(double -0.000000e+00, <16 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v16f64_b(<16 x double> %a, double %b) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16f64_b:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd vs0, v2
|
|
; PWR9LE-NEXT: xsadddp f0, f1, f0
|
|
; PWR9LE-NEXT: xxswapd vs1, v3
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v2
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v4
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v3
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v5
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v4
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v6
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v5
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v7
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v6
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v8
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v7
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xxswapd vs1, v9
|
|
; PWR9LE-NEXT: xsadddp f0, f0, v8
|
|
; PWR9LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9LE-NEXT: xsadddp f1, f0, v9
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16f64_b:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xsadddp f0, f1, v2
|
|
; PWR9BE-NEXT: xxswapd vs1, v2
|
|
; PWR9BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v3
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v3
|
|
; PWR9BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v4
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v4
|
|
; PWR9BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v5
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v5
|
|
; PWR9BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v6
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v6
|
|
; PWR9BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v7
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v7
|
|
; PWR9BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v8
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v8
|
|
; PWR9BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, v9
|
|
; PWR9BE-NEXT: xsadddp f0, f0, v9
|
|
; PWR9BE-NEXT: xsadddp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16f64_b:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd vs0, v2
|
|
; PWR10LE-NEXT: xsadddp f0, f1, f0
|
|
; PWR10LE-NEXT: xxswapd vs1, v3
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v2
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v4
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v3
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v5
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v4
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v6
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v5
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v7
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v6
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v8
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v7
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xxswapd vs1, v9
|
|
; PWR10LE-NEXT: xsadddp f0, f0, v8
|
|
; PWR10LE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10LE-NEXT: xsadddp f1, f0, v9
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16f64_b:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xsadddp f0, f1, v2
|
|
; PWR10BE-NEXT: xxswapd vs1, v2
|
|
; PWR10BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v3
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v3
|
|
; PWR10BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v4
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v4
|
|
; PWR10BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v5
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v5
|
|
; PWR10BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v6
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v6
|
|
; PWR10BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v7
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v7
|
|
; PWR10BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v8
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v8
|
|
; PWR10BE-NEXT: xsadddp f0, f0, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, v9
|
|
; PWR10BE-NEXT: xsadddp f0, f0, v9
|
|
; PWR10BE-NEXT: xsadddp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call double @llvm.vector.reduce.fadd.v16f64(double %b, <16 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16f64_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xvadddp vs0, v4, v8
|
|
; PWR9LE-NEXT: xvadddp vs1, v2, v6
|
|
; PWR9LE-NEXT: xvadddp vs2, v5, v9
|
|
; PWR9LE-NEXT: xvadddp vs3, v3, v7
|
|
; PWR9LE-NEXT: xvadddp vs2, vs3, vs2
|
|
; PWR9LE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR9LE-NEXT: xvadddp vs0, vs0, vs2
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: xvadddp vs0, vs0, vs1
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16f64_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xvadddp vs0, v4, v8
|
|
; PWR9BE-NEXT: xvadddp vs1, v2, v6
|
|
; PWR9BE-NEXT: xvadddp vs2, v5, v9
|
|
; PWR9BE-NEXT: xvadddp vs3, v3, v7
|
|
; PWR9BE-NEXT: xvadddp vs2, vs3, vs2
|
|
; PWR9BE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR9BE-NEXT: xvadddp vs0, vs0, vs2
|
|
; PWR9BE-NEXT: xxswapd vs1, vs0
|
|
; PWR9BE-NEXT: xvadddp vs1, vs0, vs1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16f64_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xvadddp vs0, v4, v8
|
|
; PWR10LE-NEXT: xvadddp vs1, v2, v6
|
|
; PWR10LE-NEXT: xvadddp vs2, v5, v9
|
|
; PWR10LE-NEXT: xvadddp vs3, v3, v7
|
|
; PWR10LE-NEXT: xvadddp vs2, vs3, vs2
|
|
; PWR10LE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR10LE-NEXT: xvadddp vs0, vs0, vs2
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: xvadddp vs0, vs0, vs1
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16f64_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xvadddp vs0, v4, v8
|
|
; PWR10BE-NEXT: xvadddp vs1, v2, v6
|
|
; PWR10BE-NEXT: xvadddp vs2, v5, v9
|
|
; PWR10BE-NEXT: xvadddp vs3, v3, v7
|
|
; PWR10BE-NEXT: xvadddp vs2, vs3, vs2
|
|
; PWR10BE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR10BE-NEXT: xvadddp vs0, vs0, vs2
|
|
; PWR10BE-NEXT: xxswapd vs1, vs0
|
|
; PWR10BE-NEXT: xvadddp vs1, vs0, vs1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast double @llvm.vector.reduce.fadd.v16f64(double -0.000000e+00, <16 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v32f64(<32 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v32f64:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd vs4, v2
|
|
; PWR9LE-NEXT: xxswapd vs5, v3
|
|
; PWR9LE-NEXT: lxv vs3, 224(r1)
|
|
; PWR9LE-NEXT: lxv vs2, 240(r1)
|
|
; PWR9LE-NEXT: lxv vs1, 256(r1)
|
|
; PWR9LE-NEXT: lxv vs0, 272(r1)
|
|
; PWR9LE-NEXT: xsadddp f4, f4, v2
|
|
; PWR9LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v4
|
|
; PWR9LE-NEXT: xsadddp f4, f4, v3
|
|
; PWR9LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v5
|
|
; PWR9LE-NEXT: xsadddp f4, f4, v4
|
|
; PWR9LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v6
|
|
; PWR9LE-NEXT: xsadddp f4, f4, v5
|
|
; PWR9LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v7
|
|
; PWR9LE-NEXT: xsadddp f4, f4, v6
|
|
; PWR9LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v8
|
|
; PWR9LE-NEXT: xsadddp f4, f4, v7
|
|
; PWR9LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v9
|
|
; PWR9LE-NEXT: xsadddp f4, f4, v8
|
|
; PWR9LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v10
|
|
; PWR9LE-NEXT: xsadddp f4, f4, v9
|
|
; PWR9LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v11
|
|
; PWR9LE-NEXT: xsadddp f4, f4, v10
|
|
; PWR9LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v12
|
|
; PWR9LE-NEXT: xsadddp f4, f4, v11
|
|
; PWR9LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v13
|
|
; PWR9LE-NEXT: xsadddp f4, f4, v12
|
|
; PWR9LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, vs3
|
|
; PWR9LE-NEXT: xsadddp f4, f4, v13
|
|
; PWR9LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9LE-NEXT: xsadddp f3, f4, f3
|
|
; PWR9LE-NEXT: xxswapd vs4, vs2
|
|
; PWR9LE-NEXT: xsadddp f3, f3, f4
|
|
; PWR9LE-NEXT: xsadddp f2, f3, f2
|
|
; PWR9LE-NEXT: xxswapd vs3, vs1
|
|
; PWR9LE-NEXT: xsadddp f2, f2, f3
|
|
; PWR9LE-NEXT: xsadddp f1, f2, f1
|
|
; PWR9LE-NEXT: xxswapd vs2, vs0
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f2
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v32f64:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxswapd vs4, v2
|
|
; PWR9BE-NEXT: xxswapd vs5, v3
|
|
; PWR9BE-NEXT: lxv vs3, 240(r1)
|
|
; PWR9BE-NEXT: lxv vs2, 256(r1)
|
|
; PWR9BE-NEXT: lxv vs1, 272(r1)
|
|
; PWR9BE-NEXT: lxv vs0, 288(r1)
|
|
; PWR9BE-NEXT: xsadddp f4, v2, f4
|
|
; PWR9BE-NEXT: xsadddp f4, f4, v3
|
|
; PWR9BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v4
|
|
; PWR9BE-NEXT: xsadddp f4, f4, v4
|
|
; PWR9BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v5
|
|
; PWR9BE-NEXT: xsadddp f4, f4, v5
|
|
; PWR9BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v6
|
|
; PWR9BE-NEXT: xsadddp f4, f4, v6
|
|
; PWR9BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v7
|
|
; PWR9BE-NEXT: xsadddp f4, f4, v7
|
|
; PWR9BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v8
|
|
; PWR9BE-NEXT: xsadddp f4, f4, v8
|
|
; PWR9BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v9
|
|
; PWR9BE-NEXT: xsadddp f4, f4, v9
|
|
; PWR9BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v10
|
|
; PWR9BE-NEXT: xsadddp f4, f4, v10
|
|
; PWR9BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v11
|
|
; PWR9BE-NEXT: xsadddp f4, f4, v11
|
|
; PWR9BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v12
|
|
; PWR9BE-NEXT: xsadddp f4, f4, v12
|
|
; PWR9BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v13
|
|
; PWR9BE-NEXT: xsadddp f4, f4, v13
|
|
; PWR9BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9BE-NEXT: xsadddp f4, f4, f3
|
|
; PWR9BE-NEXT: xxswapd vs3, vs3
|
|
; PWR9BE-NEXT: xsadddp f3, f4, f3
|
|
; PWR9BE-NEXT: xsadddp f3, f3, f2
|
|
; PWR9BE-NEXT: xxswapd vs2, vs2
|
|
; PWR9BE-NEXT: xsadddp f2, f3, f2
|
|
; PWR9BE-NEXT: xsadddp f2, f2, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, vs1
|
|
; PWR9BE-NEXT: xsadddp f1, f2, f1
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f0
|
|
; PWR9BE-NEXT: xxswapd vs0, vs0
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v32f64:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd vs4, v2
|
|
; PWR10LE-NEXT: xxswapd vs5, v3
|
|
; PWR10LE-NEXT: lxv vs3, 224(r1)
|
|
; PWR10LE-NEXT: lxv vs2, 240(r1)
|
|
; PWR10LE-NEXT: xsadddp f4, f4, v2
|
|
; PWR10LE-NEXT: lxv vs1, 256(r1)
|
|
; PWR10LE-NEXT: lxv vs0, 272(r1)
|
|
; PWR10LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v4
|
|
; PWR10LE-NEXT: xsadddp f4, f4, v3
|
|
; PWR10LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v5
|
|
; PWR10LE-NEXT: xsadddp f4, f4, v4
|
|
; PWR10LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v6
|
|
; PWR10LE-NEXT: xsadddp f4, f4, v5
|
|
; PWR10LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v7
|
|
; PWR10LE-NEXT: xsadddp f4, f4, v6
|
|
; PWR10LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v8
|
|
; PWR10LE-NEXT: xsadddp f4, f4, v7
|
|
; PWR10LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v9
|
|
; PWR10LE-NEXT: xsadddp f4, f4, v8
|
|
; PWR10LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v10
|
|
; PWR10LE-NEXT: xsadddp f4, f4, v9
|
|
; PWR10LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v11
|
|
; PWR10LE-NEXT: xsadddp f4, f4, v10
|
|
; PWR10LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v12
|
|
; PWR10LE-NEXT: xsadddp f4, f4, v11
|
|
; PWR10LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v13
|
|
; PWR10LE-NEXT: xsadddp f4, f4, v12
|
|
; PWR10LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, vs3
|
|
; PWR10LE-NEXT: xsadddp f4, f4, v13
|
|
; PWR10LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10LE-NEXT: xsadddp f3, f4, f3
|
|
; PWR10LE-NEXT: xxswapd vs4, vs2
|
|
; PWR10LE-NEXT: xsadddp f3, f3, f4
|
|
; PWR10LE-NEXT: xsadddp f2, f3, f2
|
|
; PWR10LE-NEXT: xxswapd vs3, vs1
|
|
; PWR10LE-NEXT: xsadddp f2, f2, f3
|
|
; PWR10LE-NEXT: xsadddp f1, f2, f1
|
|
; PWR10LE-NEXT: xxswapd vs2, vs0
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f2
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v32f64:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxswapd vs4, v2
|
|
; PWR10BE-NEXT: xxswapd vs5, v3
|
|
; PWR10BE-NEXT: lxv vs3, 240(r1)
|
|
; PWR10BE-NEXT: lxv vs2, 256(r1)
|
|
; PWR10BE-NEXT: xsadddp f4, v2, f4
|
|
; PWR10BE-NEXT: lxv vs1, 272(r1)
|
|
; PWR10BE-NEXT: lxv vs0, 288(r1)
|
|
; PWR10BE-NEXT: xsadddp f4, f4, v3
|
|
; PWR10BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v4
|
|
; PWR10BE-NEXT: xsadddp f4, f4, v4
|
|
; PWR10BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v5
|
|
; PWR10BE-NEXT: xsadddp f4, f4, v5
|
|
; PWR10BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v6
|
|
; PWR10BE-NEXT: xsadddp f4, f4, v6
|
|
; PWR10BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v7
|
|
; PWR10BE-NEXT: xsadddp f4, f4, v7
|
|
; PWR10BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v8
|
|
; PWR10BE-NEXT: xsadddp f4, f4, v8
|
|
; PWR10BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v9
|
|
; PWR10BE-NEXT: xsadddp f4, f4, v9
|
|
; PWR10BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v10
|
|
; PWR10BE-NEXT: xsadddp f4, f4, v10
|
|
; PWR10BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v11
|
|
; PWR10BE-NEXT: xsadddp f4, f4, v11
|
|
; PWR10BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v12
|
|
; PWR10BE-NEXT: xsadddp f4, f4, v12
|
|
; PWR10BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v13
|
|
; PWR10BE-NEXT: xsadddp f4, f4, v13
|
|
; PWR10BE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10BE-NEXT: xsadddp f4, f4, f3
|
|
; PWR10BE-NEXT: xxswapd vs3, vs3
|
|
; PWR10BE-NEXT: xsadddp f3, f4, f3
|
|
; PWR10BE-NEXT: xsadddp f3, f3, f2
|
|
; PWR10BE-NEXT: xxswapd vs2, vs2
|
|
; PWR10BE-NEXT: xsadddp f2, f3, f2
|
|
; PWR10BE-NEXT: xsadddp f2, f2, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, vs1
|
|
; PWR10BE-NEXT: xsadddp f1, f2, f1
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f0
|
|
; PWR10BE-NEXT: xxswapd vs0, vs0
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call double @llvm.vector.reduce.fadd.v32f64(double -0.000000e+00, <32 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v32f64_b(<32 x double> %a, double %b) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v32f64_b:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd vs5, v2
|
|
; PWR9LE-NEXT: lxv vs4, 224(r1)
|
|
; PWR9LE-NEXT: lxv vs3, 240(r1)
|
|
; PWR9LE-NEXT: lxv vs2, 256(r1)
|
|
; PWR9LE-NEXT: lxv vs0, 272(r1)
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v3
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v4
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v3
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v5
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v4
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v6
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v5
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v7
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v6
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v8
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v7
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v9
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v8
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v10
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v9
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v11
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v10
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v12
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v11
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, v13
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v12
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, vs4
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v13
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f4
|
|
; PWR9LE-NEXT: xxswapd vs4, vs3
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f4
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f3
|
|
; PWR9LE-NEXT: xxswapd vs3, vs2
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f3
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f2
|
|
; PWR9LE-NEXT: xxswapd vs2, vs0
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f2
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v32f64_b:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd vs5, v2
|
|
; PWR9BE-NEXT: lxv vs4, 240(r1)
|
|
; PWR9BE-NEXT: lxv vs3, 256(r1)
|
|
; PWR9BE-NEXT: lxv vs2, 272(r1)
|
|
; PWR9BE-NEXT: lxv vs0, 288(r1)
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v3
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v3
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v4
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v4
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v5
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v5
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v6
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v6
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v7
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v7
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v8
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v8
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v9
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v9
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v10
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v10
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v11
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v11
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v12
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v12
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, v13
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v13
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f4
|
|
; PWR9BE-NEXT: xxswapd vs4, vs4
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f4
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f3
|
|
; PWR9BE-NEXT: xxswapd vs3, vs3
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f3
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f2
|
|
; PWR9BE-NEXT: xxswapd vs2, vs2
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f2
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f0
|
|
; PWR9BE-NEXT: xxswapd vs0, vs0
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v32f64_b:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd vs5, v2
|
|
; PWR10LE-NEXT: lxv vs4, 224(r1)
|
|
; PWR10LE-NEXT: lxv vs3, 240(r1)
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v3
|
|
; PWR10LE-NEXT: lxv vs2, 256(r1)
|
|
; PWR10LE-NEXT: lxv vs0, 272(r1)
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v4
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v3
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v5
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v4
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v6
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v5
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v7
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v6
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v8
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v7
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v9
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v8
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v10
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v9
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v11
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v10
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v12
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v11
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, v13
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v12
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, vs4
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v13
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f4
|
|
; PWR10LE-NEXT: xxswapd vs4, vs3
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f4
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f3
|
|
; PWR10LE-NEXT: xxswapd vs3, vs2
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f3
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f2
|
|
; PWR10LE-NEXT: xxswapd vs2, vs0
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f2
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v32f64_b:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd vs5, v2
|
|
; PWR10BE-NEXT: lxv vs4, 240(r1)
|
|
; PWR10BE-NEXT: lxv vs3, 256(r1)
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v3
|
|
; PWR10BE-NEXT: lxv vs2, 272(r1)
|
|
; PWR10BE-NEXT: lxv vs0, 288(r1)
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v3
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v4
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v4
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v5
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v5
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v6
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v6
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v7
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v7
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v8
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v8
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v9
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v9
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v10
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v10
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v11
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v11
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v12
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v12
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, v13
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v13
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f4
|
|
; PWR10BE-NEXT: xxswapd vs4, vs4
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f4
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f3
|
|
; PWR10BE-NEXT: xxswapd vs3, vs3
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f3
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f2
|
|
; PWR10BE-NEXT: xxswapd vs2, vs2
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f2
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f0
|
|
; PWR10BE-NEXT: xxswapd vs0, vs0
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call double @llvm.vector.reduce.fadd.v32f64(double %b, <32 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v32f64_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: lxv vs0, 256(r1)
|
|
; PWR9LE-NEXT: lxv vs1, 224(r1)
|
|
; PWR9LE-NEXT: lxv vs2, 272(r1)
|
|
; PWR9LE-NEXT: lxv vs3, 240(r1)
|
|
; PWR9LE-NEXT: xvadddp vs4, v3, v11
|
|
; PWR9LE-NEXT: xvadddp vs5, v5, v13
|
|
; PWR9LE-NEXT: xvadddp vs6, v2, v10
|
|
; PWR9LE-NEXT: xvadddp vs7, v4, v12
|
|
; PWR9LE-NEXT: xvadddp vs3, v7, vs3
|
|
; PWR9LE-NEXT: xvadddp vs2, v9, vs2
|
|
; PWR9LE-NEXT: xvadddp vs1, v6, vs1
|
|
; PWR9LE-NEXT: xvadddp vs0, v8, vs0
|
|
; PWR9LE-NEXT: xvadddp vs0, vs7, vs0
|
|
; PWR9LE-NEXT: xvadddp vs1, vs6, vs1
|
|
; PWR9LE-NEXT: xvadddp vs2, vs5, vs2
|
|
; PWR9LE-NEXT: xvadddp vs3, vs4, vs3
|
|
; PWR9LE-NEXT: xvadddp vs2, vs3, vs2
|
|
; PWR9LE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR9LE-NEXT: xvadddp vs0, vs0, vs2
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: xvadddp vs0, vs0, vs1
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v32f64_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: lxv vs0, 272(r1)
|
|
; PWR9BE-NEXT: lxv vs1, 240(r1)
|
|
; PWR9BE-NEXT: lxv vs2, 288(r1)
|
|
; PWR9BE-NEXT: lxv vs3, 256(r1)
|
|
; PWR9BE-NEXT: xvadddp vs4, v3, v11
|
|
; PWR9BE-NEXT: xvadddp vs5, v5, v13
|
|
; PWR9BE-NEXT: xvadddp vs6, v2, v10
|
|
; PWR9BE-NEXT: xvadddp vs7, v4, v12
|
|
; PWR9BE-NEXT: xvadddp vs3, v7, vs3
|
|
; PWR9BE-NEXT: xvadddp vs2, v9, vs2
|
|
; PWR9BE-NEXT: xvadddp vs1, v6, vs1
|
|
; PWR9BE-NEXT: xvadddp vs0, v8, vs0
|
|
; PWR9BE-NEXT: xvadddp vs0, vs7, vs0
|
|
; PWR9BE-NEXT: xvadddp vs1, vs6, vs1
|
|
; PWR9BE-NEXT: xvadddp vs2, vs5, vs2
|
|
; PWR9BE-NEXT: xvadddp vs3, vs4, vs3
|
|
; PWR9BE-NEXT: xvadddp vs2, vs3, vs2
|
|
; PWR9BE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR9BE-NEXT: xvadddp vs0, vs0, vs2
|
|
; PWR9BE-NEXT: xxswapd vs1, vs0
|
|
; PWR9BE-NEXT: xvadddp vs1, vs0, vs1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v32f64_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: lxv vs0, 256(r1)
|
|
; PWR10LE-NEXT: lxv vs1, 224(r1)
|
|
; PWR10LE-NEXT: xvadddp vs4, v3, v11
|
|
; PWR10LE-NEXT: xvadddp vs5, v5, v13
|
|
; PWR10LE-NEXT: xvadddp vs6, v2, v10
|
|
; PWR10LE-NEXT: xvadddp vs7, v4, v12
|
|
; PWR10LE-NEXT: xvadddp vs1, v6, vs1
|
|
; PWR10LE-NEXT: lxv vs2, 272(r1)
|
|
; PWR10LE-NEXT: lxv vs3, 240(r1)
|
|
; PWR10LE-NEXT: xvadddp vs3, v7, vs3
|
|
; PWR10LE-NEXT: xvadddp vs2, v9, vs2
|
|
; PWR10LE-NEXT: xvadddp vs0, v8, vs0
|
|
; PWR10LE-NEXT: xvadddp vs0, vs7, vs0
|
|
; PWR10LE-NEXT: xvadddp vs1, vs6, vs1
|
|
; PWR10LE-NEXT: xvadddp vs2, vs5, vs2
|
|
; PWR10LE-NEXT: xvadddp vs3, vs4, vs3
|
|
; PWR10LE-NEXT: xvadddp vs2, vs3, vs2
|
|
; PWR10LE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR10LE-NEXT: xvadddp vs0, vs0, vs2
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: xvadddp vs0, vs0, vs1
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v32f64_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: lxv vs0, 272(r1)
|
|
; PWR10BE-NEXT: lxv vs1, 240(r1)
|
|
; PWR10BE-NEXT: xvadddp vs4, v3, v11
|
|
; PWR10BE-NEXT: xvadddp vs5, v5, v13
|
|
; PWR10BE-NEXT: xvadddp vs6, v2, v10
|
|
; PWR10BE-NEXT: xvadddp vs7, v4, v12
|
|
; PWR10BE-NEXT: xvadddp vs1, v6, vs1
|
|
; PWR10BE-NEXT: lxv vs2, 288(r1)
|
|
; PWR10BE-NEXT: lxv vs3, 256(r1)
|
|
; PWR10BE-NEXT: xvadddp vs3, v7, vs3
|
|
; PWR10BE-NEXT: xvadddp vs2, v9, vs2
|
|
; PWR10BE-NEXT: xvadddp vs0, v8, vs0
|
|
; PWR10BE-NEXT: xvadddp vs0, vs7, vs0
|
|
; PWR10BE-NEXT: xvadddp vs1, vs6, vs1
|
|
; PWR10BE-NEXT: xvadddp vs2, vs5, vs2
|
|
; PWR10BE-NEXT: xvadddp vs3, vs4, vs3
|
|
; PWR10BE-NEXT: xvadddp vs2, vs3, vs2
|
|
; PWR10BE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR10BE-NEXT: xvadddp vs0, vs0, vs2
|
|
; PWR10BE-NEXT: xxswapd vs1, vs0
|
|
; PWR10BE-NEXT: xvadddp vs1, vs0, vs1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast double @llvm.vector.reduce.fadd.v32f64(double -0.000000e+00, <32 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v64f64(<64 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v64f64:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd v18, v2
|
|
; PWR9LE-NEXT: lxv v17, 224(r1)
|
|
; PWR9LE-NEXT: lxv v16, 240(r1)
|
|
; PWR9LE-NEXT: lxv v15, 256(r1)
|
|
; PWR9LE-NEXT: lxv v14, 272(r1)
|
|
; PWR9LE-NEXT: xsadddp v2, v18, v2
|
|
; PWR9LE-NEXT: xxswapd v18, v3
|
|
; PWR9LE-NEXT: lxv v1, 288(r1)
|
|
; PWR9LE-NEXT: lxv v0, 304(r1)
|
|
; PWR9LE-NEXT: lxv vs13, 320(r1)
|
|
; PWR9LE-NEXT: lxv vs12, 336(r1)
|
|
; PWR9LE-NEXT: lxv vs11, 352(r1)
|
|
; PWR9LE-NEXT: lxv vs10, 368(r1)
|
|
; PWR9LE-NEXT: lxv vs9, 384(r1)
|
|
; PWR9LE-NEXT: lxv vs8, 400(r1)
|
|
; PWR9LE-NEXT: lxv vs7, 416(r1)
|
|
; PWR9LE-NEXT: lxv vs6, 432(r1)
|
|
; PWR9LE-NEXT: lxv vs5, 448(r1)
|
|
; PWR9LE-NEXT: lxv vs4, 464(r1)
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v18
|
|
; PWR9LE-NEXT: lxv vs3, 480(r1)
|
|
; PWR9LE-NEXT: lxv vs2, 496(r1)
|
|
; PWR9LE-NEXT: lxv vs1, 512(r1)
|
|
; PWR9LE-NEXT: lxv vs0, 528(r1)
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v4
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v5
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v4
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v6
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v5
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v7
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v6
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v8
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v7
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v9
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v8
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v10
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v9
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v11
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v10
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v12
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v11
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v13
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v12
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v17
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v13
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v16
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v17
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v15
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v16
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v14
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v15
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v1
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v14
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v0
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v1
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, vs13
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v0
|
|
; PWR9LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9LE-NEXT: xsadddp f13, v2, f13
|
|
; PWR9LE-NEXT: xxswapd v2, vs12
|
|
; PWR9LE-NEXT: xsadddp f13, f13, v2
|
|
; PWR9LE-NEXT: xsadddp f12, f13, f12
|
|
; PWR9LE-NEXT: xxswapd vs13, vs11
|
|
; PWR9LE-NEXT: xsadddp f12, f12, f13
|
|
; PWR9LE-NEXT: xsadddp f11, f12, f11
|
|
; PWR9LE-NEXT: xxswapd vs12, vs10
|
|
; PWR9LE-NEXT: xsadddp f11, f11, f12
|
|
; PWR9LE-NEXT: xsadddp f10, f11, f10
|
|
; PWR9LE-NEXT: xxswapd vs11, vs9
|
|
; PWR9LE-NEXT: xsadddp f10, f10, f11
|
|
; PWR9LE-NEXT: xsadddp f9, f10, f9
|
|
; PWR9LE-NEXT: xxswapd vs10, vs8
|
|
; PWR9LE-NEXT: xsadddp f9, f9, f10
|
|
; PWR9LE-NEXT: xsadddp f8, f9, f8
|
|
; PWR9LE-NEXT: xxswapd vs9, vs7
|
|
; PWR9LE-NEXT: xsadddp f8, f8, f9
|
|
; PWR9LE-NEXT: xsadddp f7, f8, f7
|
|
; PWR9LE-NEXT: xxswapd vs8, vs6
|
|
; PWR9LE-NEXT: xsadddp f7, f7, f8
|
|
; PWR9LE-NEXT: xsadddp f6, f7, f6
|
|
; PWR9LE-NEXT: xxswapd vs7, vs5
|
|
; PWR9LE-NEXT: xsadddp f6, f6, f7
|
|
; PWR9LE-NEXT: xsadddp f5, f6, f5
|
|
; PWR9LE-NEXT: xxswapd vs6, vs4
|
|
; PWR9LE-NEXT: xsadddp f5, f5, f6
|
|
; PWR9LE-NEXT: xsadddp f4, f5, f4
|
|
; PWR9LE-NEXT: xxswapd vs5, vs3
|
|
; PWR9LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR9LE-NEXT: xsadddp f3, f4, f3
|
|
; PWR9LE-NEXT: xxswapd vs4, vs2
|
|
; PWR9LE-NEXT: xsadddp f3, f3, f4
|
|
; PWR9LE-NEXT: xsadddp f2, f3, f2
|
|
; PWR9LE-NEXT: xxswapd vs3, vs1
|
|
; PWR9LE-NEXT: xsadddp f2, f2, f3
|
|
; PWR9LE-NEXT: xsadddp f1, f2, f1
|
|
; PWR9LE-NEXT: xxswapd vs2, vs0
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f2
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v64f64:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxswapd v18, v2
|
|
; PWR9BE-NEXT: lxv v17, 240(r1)
|
|
; PWR9BE-NEXT: lxv v16, 256(r1)
|
|
; PWR9BE-NEXT: lxv v15, 272(r1)
|
|
; PWR9BE-NEXT: lxv v14, 288(r1)
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v18
|
|
; PWR9BE-NEXT: lxv v1, 304(r1)
|
|
; PWR9BE-NEXT: lxv v0, 320(r1)
|
|
; PWR9BE-NEXT: lxv vs13, 336(r1)
|
|
; PWR9BE-NEXT: lxv vs12, 352(r1)
|
|
; PWR9BE-NEXT: lxv vs11, 368(r1)
|
|
; PWR9BE-NEXT: lxv vs10, 384(r1)
|
|
; PWR9BE-NEXT: lxv vs9, 400(r1)
|
|
; PWR9BE-NEXT: lxv vs8, 416(r1)
|
|
; PWR9BE-NEXT: lxv vs7, 432(r1)
|
|
; PWR9BE-NEXT: lxv vs6, 448(r1)
|
|
; PWR9BE-NEXT: lxv vs5, 464(r1)
|
|
; PWR9BE-NEXT: lxv vs4, 480(r1)
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v3
|
|
; PWR9BE-NEXT: lxv vs3, 496(r1)
|
|
; PWR9BE-NEXT: lxv vs2, 512(r1)
|
|
; PWR9BE-NEXT: lxv vs1, 528(r1)
|
|
; PWR9BE-NEXT: lxv vs0, 544(r1)
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v4
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v4
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v5
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v5
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v6
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v6
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v7
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v7
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v8
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v8
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v9
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v9
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v10
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v10
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v11
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v11
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v12
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v12
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v13
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v13
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v17
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v17
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v16
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v16
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v15
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v15
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v14
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v14
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v1
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v1
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v0
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v0
|
|
; PWR9BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR9BE-NEXT: xsadddp v2, v2, f13
|
|
; PWR9BE-NEXT: xxswapd vs13, vs13
|
|
; PWR9BE-NEXT: xsadddp f13, v2, f13
|
|
; PWR9BE-NEXT: xsadddp f13, f13, f12
|
|
; PWR9BE-NEXT: xxswapd vs12, vs12
|
|
; PWR9BE-NEXT: xsadddp f12, f13, f12
|
|
; PWR9BE-NEXT: xsadddp f12, f12, f11
|
|
; PWR9BE-NEXT: xxswapd vs11, vs11
|
|
; PWR9BE-NEXT: xsadddp f11, f12, f11
|
|
; PWR9BE-NEXT: xsadddp f11, f11, f10
|
|
; PWR9BE-NEXT: xxswapd vs10, vs10
|
|
; PWR9BE-NEXT: xsadddp f10, f11, f10
|
|
; PWR9BE-NEXT: xsadddp f10, f10, f9
|
|
; PWR9BE-NEXT: xxswapd vs9, vs9
|
|
; PWR9BE-NEXT: xsadddp f9, f10, f9
|
|
; PWR9BE-NEXT: xsadddp f9, f9, f8
|
|
; PWR9BE-NEXT: xxswapd vs8, vs8
|
|
; PWR9BE-NEXT: xsadddp f8, f9, f8
|
|
; PWR9BE-NEXT: xsadddp f8, f8, f7
|
|
; PWR9BE-NEXT: xxswapd vs7, vs7
|
|
; PWR9BE-NEXT: xsadddp f7, f8, f7
|
|
; PWR9BE-NEXT: xsadddp f7, f7, f6
|
|
; PWR9BE-NEXT: xxswapd vs6, vs6
|
|
; PWR9BE-NEXT: xsadddp f6, f7, f6
|
|
; PWR9BE-NEXT: xsadddp f6, f6, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, vs5
|
|
; PWR9BE-NEXT: xsadddp f5, f6, f5
|
|
; PWR9BE-NEXT: xsadddp f5, f5, f4
|
|
; PWR9BE-NEXT: xxswapd vs4, vs4
|
|
; PWR9BE-NEXT: xsadddp f4, f5, f4
|
|
; PWR9BE-NEXT: xsadddp f4, f4, f3
|
|
; PWR9BE-NEXT: xxswapd vs3, vs3
|
|
; PWR9BE-NEXT: xsadddp f3, f4, f3
|
|
; PWR9BE-NEXT: xsadddp f3, f3, f2
|
|
; PWR9BE-NEXT: xxswapd vs2, vs2
|
|
; PWR9BE-NEXT: xsadddp f2, f3, f2
|
|
; PWR9BE-NEXT: xsadddp f2, f2, f1
|
|
; PWR9BE-NEXT: xxswapd vs1, vs1
|
|
; PWR9BE-NEXT: xsadddp f1, f2, f1
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f0
|
|
; PWR9BE-NEXT: xxswapd vs0, vs0
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v64f64:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd v18, v2
|
|
; PWR10LE-NEXT: lxv v17, 224(r1)
|
|
; PWR10LE-NEXT: lxv v16, 240(r1)
|
|
; PWR10LE-NEXT: xsadddp v2, v18, v2
|
|
; PWR10LE-NEXT: xxswapd v18, v3
|
|
; PWR10LE-NEXT: lxv v15, 256(r1)
|
|
; PWR10LE-NEXT: lxv v14, 272(r1)
|
|
; PWR10LE-NEXT: lxv v1, 288(r1)
|
|
; PWR10LE-NEXT: lxv v0, 304(r1)
|
|
; PWR10LE-NEXT: lxv vs13, 320(r1)
|
|
; PWR10LE-NEXT: lxv vs12, 336(r1)
|
|
; PWR10LE-NEXT: lxv vs11, 352(r1)
|
|
; PWR10LE-NEXT: lxv vs10, 368(r1)
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v18
|
|
; PWR10LE-NEXT: lxv vs9, 384(r1)
|
|
; PWR10LE-NEXT: lxv vs8, 400(r1)
|
|
; PWR10LE-NEXT: lxv vs7, 416(r1)
|
|
; PWR10LE-NEXT: lxv vs6, 432(r1)
|
|
; PWR10LE-NEXT: lxv vs5, 448(r1)
|
|
; PWR10LE-NEXT: lxv vs4, 464(r1)
|
|
; PWR10LE-NEXT: lxv vs3, 480(r1)
|
|
; PWR10LE-NEXT: lxv vs2, 496(r1)
|
|
; PWR10LE-NEXT: lxv vs1, 512(r1)
|
|
; PWR10LE-NEXT: lxv vs0, 528(r1)
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v4
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v5
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v4
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v6
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v5
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v7
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v6
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v8
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v7
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v9
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v8
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v10
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v9
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v11
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v10
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v12
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v11
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v13
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v12
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v17
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v13
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v16
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v17
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v15
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v16
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v14
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v15
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v1
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v14
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v0
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v1
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, vs13
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v0
|
|
; PWR10LE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10LE-NEXT: xsadddp f13, v2, f13
|
|
; PWR10LE-NEXT: xxswapd v2, vs12
|
|
; PWR10LE-NEXT: xsadddp f13, f13, v2
|
|
; PWR10LE-NEXT: xsadddp f12, f13, f12
|
|
; PWR10LE-NEXT: xxswapd vs13, vs11
|
|
; PWR10LE-NEXT: xsadddp f12, f12, f13
|
|
; PWR10LE-NEXT: xsadddp f11, f12, f11
|
|
; PWR10LE-NEXT: xxswapd vs12, vs10
|
|
; PWR10LE-NEXT: xsadddp f11, f11, f12
|
|
; PWR10LE-NEXT: xsadddp f10, f11, f10
|
|
; PWR10LE-NEXT: xxswapd vs11, vs9
|
|
; PWR10LE-NEXT: xsadddp f10, f10, f11
|
|
; PWR10LE-NEXT: xsadddp f9, f10, f9
|
|
; PWR10LE-NEXT: xxswapd vs10, vs8
|
|
; PWR10LE-NEXT: xsadddp f9, f9, f10
|
|
; PWR10LE-NEXT: xsadddp f8, f9, f8
|
|
; PWR10LE-NEXT: xxswapd vs9, vs7
|
|
; PWR10LE-NEXT: xsadddp f8, f8, f9
|
|
; PWR10LE-NEXT: xsadddp f7, f8, f7
|
|
; PWR10LE-NEXT: xxswapd vs8, vs6
|
|
; PWR10LE-NEXT: xsadddp f7, f7, f8
|
|
; PWR10LE-NEXT: xsadddp f6, f7, f6
|
|
; PWR10LE-NEXT: xxswapd vs7, vs5
|
|
; PWR10LE-NEXT: xsadddp f6, f6, f7
|
|
; PWR10LE-NEXT: xsadddp f5, f6, f5
|
|
; PWR10LE-NEXT: xxswapd vs6, vs4
|
|
; PWR10LE-NEXT: xsadddp f5, f5, f6
|
|
; PWR10LE-NEXT: xsadddp f4, f5, f4
|
|
; PWR10LE-NEXT: xxswapd vs5, vs3
|
|
; PWR10LE-NEXT: xsadddp f4, f4, f5
|
|
; PWR10LE-NEXT: xsadddp f3, f4, f3
|
|
; PWR10LE-NEXT: xxswapd vs4, vs2
|
|
; PWR10LE-NEXT: xsadddp f3, f3, f4
|
|
; PWR10LE-NEXT: xsadddp f2, f3, f2
|
|
; PWR10LE-NEXT: xxswapd vs3, vs1
|
|
; PWR10LE-NEXT: xsadddp f2, f2, f3
|
|
; PWR10LE-NEXT: xsadddp f1, f2, f1
|
|
; PWR10LE-NEXT: xxswapd vs2, vs0
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f2
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v64f64:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxswapd v18, v2
|
|
; PWR10BE-NEXT: lxv v17, 240(r1)
|
|
; PWR10BE-NEXT: lxv v16, 256(r1)
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v18
|
|
; PWR10BE-NEXT: lxv v15, 272(r1)
|
|
; PWR10BE-NEXT: lxv v14, 288(r1)
|
|
; PWR10BE-NEXT: lxv v1, 304(r1)
|
|
; PWR10BE-NEXT: lxv v0, 320(r1)
|
|
; PWR10BE-NEXT: lxv vs13, 336(r1)
|
|
; PWR10BE-NEXT: lxv vs12, 352(r1)
|
|
; PWR10BE-NEXT: lxv vs11, 368(r1)
|
|
; PWR10BE-NEXT: lxv vs10, 384(r1)
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v3
|
|
; PWR10BE-NEXT: lxv vs9, 400(r1)
|
|
; PWR10BE-NEXT: lxv vs8, 416(r1)
|
|
; PWR10BE-NEXT: lxv vs7, 432(r1)
|
|
; PWR10BE-NEXT: lxv vs6, 448(r1)
|
|
; PWR10BE-NEXT: lxv vs5, 464(r1)
|
|
; PWR10BE-NEXT: lxv vs4, 480(r1)
|
|
; PWR10BE-NEXT: lxv vs3, 496(r1)
|
|
; PWR10BE-NEXT: lxv vs2, 512(r1)
|
|
; PWR10BE-NEXT: lxv vs1, 528(r1)
|
|
; PWR10BE-NEXT: lxv vs0, 544(r1)
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v4
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v4
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v5
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v5
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v6
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v6
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v7
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v7
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v8
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v8
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v9
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v9
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v10
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v10
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v11
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v11
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v12
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v12
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v13
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v13
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v17
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v17
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v16
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v16
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v15
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v15
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v14
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v14
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v1
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v1
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v0
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v0
|
|
; PWR10BE-NEXT: xsadddp v2, v2, v3
|
|
; PWR10BE-NEXT: xsadddp v2, v2, f13
|
|
; PWR10BE-NEXT: xxswapd vs13, vs13
|
|
; PWR10BE-NEXT: xsadddp f13, v2, f13
|
|
; PWR10BE-NEXT: xsadddp f13, f13, f12
|
|
; PWR10BE-NEXT: xxswapd vs12, vs12
|
|
; PWR10BE-NEXT: xsadddp f12, f13, f12
|
|
; PWR10BE-NEXT: xsadddp f12, f12, f11
|
|
; PWR10BE-NEXT: xxswapd vs11, vs11
|
|
; PWR10BE-NEXT: xsadddp f11, f12, f11
|
|
; PWR10BE-NEXT: xsadddp f11, f11, f10
|
|
; PWR10BE-NEXT: xxswapd vs10, vs10
|
|
; PWR10BE-NEXT: xsadddp f10, f11, f10
|
|
; PWR10BE-NEXT: xsadddp f10, f10, f9
|
|
; PWR10BE-NEXT: xxswapd vs9, vs9
|
|
; PWR10BE-NEXT: xsadddp f9, f10, f9
|
|
; PWR10BE-NEXT: xsadddp f9, f9, f8
|
|
; PWR10BE-NEXT: xxswapd vs8, vs8
|
|
; PWR10BE-NEXT: xsadddp f8, f9, f8
|
|
; PWR10BE-NEXT: xsadddp f8, f8, f7
|
|
; PWR10BE-NEXT: xxswapd vs7, vs7
|
|
; PWR10BE-NEXT: xsadddp f7, f8, f7
|
|
; PWR10BE-NEXT: xsadddp f7, f7, f6
|
|
; PWR10BE-NEXT: xxswapd vs6, vs6
|
|
; PWR10BE-NEXT: xsadddp f6, f7, f6
|
|
; PWR10BE-NEXT: xsadddp f6, f6, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, vs5
|
|
; PWR10BE-NEXT: xsadddp f5, f6, f5
|
|
; PWR10BE-NEXT: xsadddp f5, f5, f4
|
|
; PWR10BE-NEXT: xxswapd vs4, vs4
|
|
; PWR10BE-NEXT: xsadddp f4, f5, f4
|
|
; PWR10BE-NEXT: xsadddp f4, f4, f3
|
|
; PWR10BE-NEXT: xxswapd vs3, vs3
|
|
; PWR10BE-NEXT: xsadddp f3, f4, f3
|
|
; PWR10BE-NEXT: xsadddp f3, f3, f2
|
|
; PWR10BE-NEXT: xxswapd vs2, vs2
|
|
; PWR10BE-NEXT: xsadddp f2, f3, f2
|
|
; PWR10BE-NEXT: xsadddp f2, f2, f1
|
|
; PWR10BE-NEXT: xxswapd vs1, vs1
|
|
; PWR10BE-NEXT: xsadddp f1, f2, f1
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f0
|
|
; PWR10BE-NEXT: xxswapd vs0, vs0
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call double @llvm.vector.reduce.fadd.v64f64(double -0.000000e+00, <64 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v64f64_b(<64 x double> %a, double %b) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v64f64_b:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd v19, v2
|
|
; PWR9LE-NEXT: lxv v18, 224(r1)
|
|
; PWR9LE-NEXT: lxv v17, 240(r1)
|
|
; PWR9LE-NEXT: lxv v16, 256(r1)
|
|
; PWR9LE-NEXT: lxv v15, 272(r1)
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v19
|
|
; PWR9LE-NEXT: lxv v14, 288(r1)
|
|
; PWR9LE-NEXT: lxv v1, 304(r1)
|
|
; PWR9LE-NEXT: lxv v0, 320(r1)
|
|
; PWR9LE-NEXT: lxv vs13, 336(r1)
|
|
; PWR9LE-NEXT: lxv vs12, 352(r1)
|
|
; PWR9LE-NEXT: lxv vs11, 368(r1)
|
|
; PWR9LE-NEXT: lxv vs10, 384(r1)
|
|
; PWR9LE-NEXT: lxv vs9, 400(r1)
|
|
; PWR9LE-NEXT: lxv vs8, 416(r1)
|
|
; PWR9LE-NEXT: lxv vs7, 432(r1)
|
|
; PWR9LE-NEXT: lxv vs6, 448(r1)
|
|
; PWR9LE-NEXT: lxv vs5, 464(r1)
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xxswapd v2, v3
|
|
; PWR9LE-NEXT: lxv vs4, 480(r1)
|
|
; PWR9LE-NEXT: lxv vs3, 496(r1)
|
|
; PWR9LE-NEXT: lxv vs2, 512(r1)
|
|
; PWR9LE-NEXT: lxv vs0, 528(r1)
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xxswapd v2, v4
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v3
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xxswapd v2, v5
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v4
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xxswapd v2, v6
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v5
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xxswapd v2, v7
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v6
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xxswapd v2, v8
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v7
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xxswapd v2, v9
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v8
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xxswapd v2, v10
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v9
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xxswapd v2, v11
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v10
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xxswapd v2, v12
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v11
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xxswapd v2, v13
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v12
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xxswapd v2, v18
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v13
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xxswapd v2, v17
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v18
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xxswapd v2, v16
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v17
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xxswapd v2, v15
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v16
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xxswapd v2, v14
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v15
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xxswapd v2, v1
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v14
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xxswapd v2, v0
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v1
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xxswapd v2, vs13
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v0
|
|
; PWR9LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f13
|
|
; PWR9LE-NEXT: xxswapd vs13, vs12
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f13
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f12
|
|
; PWR9LE-NEXT: xxswapd vs12, vs11
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f12
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f11
|
|
; PWR9LE-NEXT: xxswapd vs11, vs10
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f11
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f10
|
|
; PWR9LE-NEXT: xxswapd vs10, vs9
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f10
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f9
|
|
; PWR9LE-NEXT: xxswapd vs9, vs8
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f9
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f8
|
|
; PWR9LE-NEXT: xxswapd vs8, vs7
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f8
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f7
|
|
; PWR9LE-NEXT: xxswapd vs7, vs6
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f7
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f6
|
|
; PWR9LE-NEXT: xxswapd vs6, vs5
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f6
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9LE-NEXT: xxswapd vs5, vs4
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f4
|
|
; PWR9LE-NEXT: xxswapd vs4, vs3
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f4
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f3
|
|
; PWR9LE-NEXT: xxswapd vs3, vs2
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f3
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f2
|
|
; PWR9LE-NEXT: xxswapd vs2, vs0
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f2
|
|
; PWR9LE-NEXT: xsadddp f1, f1, f0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v64f64_b:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd v2, v2
|
|
; PWR9BE-NEXT: lxv v18, 240(r1)
|
|
; PWR9BE-NEXT: lxv v17, 256(r1)
|
|
; PWR9BE-NEXT: lxv v16, 272(r1)
|
|
; PWR9BE-NEXT: lxv v15, 288(r1)
|
|
; PWR9BE-NEXT: lxv v14, 304(r1)
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd v2, v3
|
|
; PWR9BE-NEXT: lxv v1, 320(r1)
|
|
; PWR9BE-NEXT: lxv v0, 336(r1)
|
|
; PWR9BE-NEXT: lxv vs13, 352(r1)
|
|
; PWR9BE-NEXT: lxv vs12, 368(r1)
|
|
; PWR9BE-NEXT: lxv vs11, 384(r1)
|
|
; PWR9BE-NEXT: lxv vs10, 400(r1)
|
|
; PWR9BE-NEXT: lxv vs9, 416(r1)
|
|
; PWR9BE-NEXT: lxv vs8, 432(r1)
|
|
; PWR9BE-NEXT: lxv vs7, 448(r1)
|
|
; PWR9BE-NEXT: lxv vs6, 464(r1)
|
|
; PWR9BE-NEXT: lxv vs5, 480(r1)
|
|
; PWR9BE-NEXT: lxv vs4, 496(r1)
|
|
; PWR9BE-NEXT: lxv vs3, 512(r1)
|
|
; PWR9BE-NEXT: lxv vs2, 528(r1)
|
|
; PWR9BE-NEXT: lxv vs0, 544(r1)
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v3
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd v2, v4
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v4
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd v2, v5
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v5
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd v2, v6
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v6
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd v2, v7
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v7
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd v2, v8
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v8
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd v2, v9
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v9
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd v2, v10
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v10
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd v2, v11
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v11
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd v2, v12
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v12
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd v2, v13
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v13
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd v2, v18
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v18
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd v2, v17
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v17
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd v2, v16
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v16
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd v2, v15
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v15
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd v2, v14
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v14
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd v2, v1
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v1
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xxswapd v2, v0
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v0
|
|
; PWR9BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f13
|
|
; PWR9BE-NEXT: xxswapd vs13, vs13
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f13
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f12
|
|
; PWR9BE-NEXT: xxswapd vs12, vs12
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f12
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f11
|
|
; PWR9BE-NEXT: xxswapd vs11, vs11
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f11
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f10
|
|
; PWR9BE-NEXT: xxswapd vs10, vs10
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f10
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f9
|
|
; PWR9BE-NEXT: xxswapd vs9, vs9
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f9
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f8
|
|
; PWR9BE-NEXT: xxswapd vs8, vs8
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f8
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f7
|
|
; PWR9BE-NEXT: xxswapd vs7, vs7
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f7
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f6
|
|
; PWR9BE-NEXT: xxswapd vs6, vs6
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f6
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9BE-NEXT: xxswapd vs5, vs5
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f4
|
|
; PWR9BE-NEXT: xxswapd vs4, vs4
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f4
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f3
|
|
; PWR9BE-NEXT: xxswapd vs3, vs3
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f3
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f2
|
|
; PWR9BE-NEXT: xxswapd vs2, vs2
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f2
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f0
|
|
; PWR9BE-NEXT: xxswapd vs0, vs0
|
|
; PWR9BE-NEXT: xsadddp f1, f1, f0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v64f64_b:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd v19, v2
|
|
; PWR10LE-NEXT: lxv v18, 224(r1)
|
|
; PWR10LE-NEXT: lxv v17, 240(r1)
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v19
|
|
; PWR10LE-NEXT: lxv v16, 256(r1)
|
|
; PWR10LE-NEXT: lxv v15, 272(r1)
|
|
; PWR10LE-NEXT: lxv v14, 288(r1)
|
|
; PWR10LE-NEXT: lxv v1, 304(r1)
|
|
; PWR10LE-NEXT: lxv v0, 320(r1)
|
|
; PWR10LE-NEXT: lxv vs13, 336(r1)
|
|
; PWR10LE-NEXT: lxv vs12, 352(r1)
|
|
; PWR10LE-NEXT: lxv vs11, 368(r1)
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xxswapd v2, v3
|
|
; PWR10LE-NEXT: lxv vs10, 384(r1)
|
|
; PWR10LE-NEXT: lxv vs9, 400(r1)
|
|
; PWR10LE-NEXT: lxv vs8, 416(r1)
|
|
; PWR10LE-NEXT: lxv vs7, 432(r1)
|
|
; PWR10LE-NEXT: lxv vs6, 448(r1)
|
|
; PWR10LE-NEXT: lxv vs5, 464(r1)
|
|
; PWR10LE-NEXT: lxv vs4, 480(r1)
|
|
; PWR10LE-NEXT: lxv vs3, 496(r1)
|
|
; PWR10LE-NEXT: lxv vs2, 512(r1)
|
|
; PWR10LE-NEXT: lxv vs0, 528(r1)
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xxswapd v2, v4
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v3
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xxswapd v2, v5
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v4
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xxswapd v2, v6
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v5
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xxswapd v2, v7
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v6
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xxswapd v2, v8
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v7
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xxswapd v2, v9
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v8
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xxswapd v2, v10
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v9
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xxswapd v2, v11
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v10
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xxswapd v2, v12
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v11
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xxswapd v2, v13
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v12
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xxswapd v2, v18
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v13
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xxswapd v2, v17
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v18
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xxswapd v2, v16
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v17
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xxswapd v2, v15
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v16
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xxswapd v2, v14
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v15
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xxswapd v2, v1
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v14
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xxswapd v2, v0
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v1
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xxswapd v2, vs13
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v0
|
|
; PWR10LE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f13
|
|
; PWR10LE-NEXT: xxswapd vs13, vs12
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f13
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f12
|
|
; PWR10LE-NEXT: xxswapd vs12, vs11
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f12
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f11
|
|
; PWR10LE-NEXT: xxswapd vs11, vs10
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f11
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f10
|
|
; PWR10LE-NEXT: xxswapd vs10, vs9
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f10
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f9
|
|
; PWR10LE-NEXT: xxswapd vs9, vs8
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f9
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f8
|
|
; PWR10LE-NEXT: xxswapd vs8, vs7
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f8
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f7
|
|
; PWR10LE-NEXT: xxswapd vs7, vs6
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f7
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f6
|
|
; PWR10LE-NEXT: xxswapd vs6, vs5
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f6
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10LE-NEXT: xxswapd vs5, vs4
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f4
|
|
; PWR10LE-NEXT: xxswapd vs4, vs3
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f4
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f3
|
|
; PWR10LE-NEXT: xxswapd vs3, vs2
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f3
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f2
|
|
; PWR10LE-NEXT: xxswapd vs2, vs0
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f2
|
|
; PWR10LE-NEXT: xsadddp f1, f1, f0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v64f64_b:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd v2, v2
|
|
; PWR10BE-NEXT: lxv v18, 240(r1)
|
|
; PWR10BE-NEXT: lxv v17, 256(r1)
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd v2, v3
|
|
; PWR10BE-NEXT: lxv v16, 272(r1)
|
|
; PWR10BE-NEXT: lxv v15, 288(r1)
|
|
; PWR10BE-NEXT: lxv v14, 304(r1)
|
|
; PWR10BE-NEXT: lxv v1, 320(r1)
|
|
; PWR10BE-NEXT: lxv v0, 336(r1)
|
|
; PWR10BE-NEXT: lxv vs13, 352(r1)
|
|
; PWR10BE-NEXT: lxv vs12, 368(r1)
|
|
; PWR10BE-NEXT: lxv vs11, 384(r1)
|
|
; PWR10BE-NEXT: lxv vs10, 400(r1)
|
|
; PWR10BE-NEXT: lxv vs9, 416(r1)
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v3
|
|
; PWR10BE-NEXT: lxv vs8, 432(r1)
|
|
; PWR10BE-NEXT: lxv vs7, 448(r1)
|
|
; PWR10BE-NEXT: lxv vs6, 464(r1)
|
|
; PWR10BE-NEXT: lxv vs5, 480(r1)
|
|
; PWR10BE-NEXT: lxv vs4, 496(r1)
|
|
; PWR10BE-NEXT: lxv vs3, 512(r1)
|
|
; PWR10BE-NEXT: lxv vs2, 528(r1)
|
|
; PWR10BE-NEXT: lxv vs0, 544(r1)
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd v2, v4
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v4
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd v2, v5
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v5
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd v2, v6
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v6
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd v2, v7
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v7
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd v2, v8
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v8
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd v2, v9
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v9
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd v2, v10
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v10
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd v2, v11
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v11
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd v2, v12
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v12
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd v2, v13
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v13
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd v2, v18
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v18
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd v2, v17
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v17
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd v2, v16
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v16
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd v2, v15
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v15
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd v2, v14
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v14
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd v2, v1
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v1
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xxswapd v2, v0
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v0
|
|
; PWR10BE-NEXT: xsadddp f1, f1, v2
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f13
|
|
; PWR10BE-NEXT: xxswapd vs13, vs13
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f13
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f12
|
|
; PWR10BE-NEXT: xxswapd vs12, vs12
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f12
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f11
|
|
; PWR10BE-NEXT: xxswapd vs11, vs11
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f11
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f10
|
|
; PWR10BE-NEXT: xxswapd vs10, vs10
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f10
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f9
|
|
; PWR10BE-NEXT: xxswapd vs9, vs9
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f9
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f8
|
|
; PWR10BE-NEXT: xxswapd vs8, vs8
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f8
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f7
|
|
; PWR10BE-NEXT: xxswapd vs7, vs7
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f7
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f6
|
|
; PWR10BE-NEXT: xxswapd vs6, vs6
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f6
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10BE-NEXT: xxswapd vs5, vs5
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f5
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f4
|
|
; PWR10BE-NEXT: xxswapd vs4, vs4
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f4
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f3
|
|
; PWR10BE-NEXT: xxswapd vs3, vs3
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f3
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f2
|
|
; PWR10BE-NEXT: xxswapd vs2, vs2
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f2
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f0
|
|
; PWR10BE-NEXT: xxswapd vs0, vs0
|
|
; PWR10BE-NEXT: xsadddp f1, f1, f0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call double @llvm.vector.reduce.fadd.v64f64(double %b, <64 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v64f64_fast(<64 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v64f64_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: lxv vs0, 368(r1)
|
|
; PWR9LE-NEXT: lxv vs1, 496(r1)
|
|
; PWR9LE-NEXT: lxv vs2, 240(r1)
|
|
; PWR9LE-NEXT: lxv vs3, 304(r1)
|
|
; PWR9LE-NEXT: xvadddp vs3, v3, vs3
|
|
; PWR9LE-NEXT: lxv vs4, 432(r1)
|
|
; PWR9LE-NEXT: lxv vs5, 400(r1)
|
|
; PWR9LE-NEXT: lxv vs6, 528(r1)
|
|
; PWR9LE-NEXT: lxv vs7, 272(r1)
|
|
; PWR9LE-NEXT: lxv vs8, 336(r1)
|
|
; PWR9LE-NEXT: lxv vs9, 464(r1)
|
|
; PWR9LE-NEXT: lxv vs10, 352(r1)
|
|
; PWR9LE-NEXT: lxv vs11, 480(r1)
|
|
; PWR9LE-NEXT: lxv vs12, 224(r1)
|
|
; PWR9LE-NEXT: lxv vs13, 288(r1)
|
|
; PWR9LE-NEXT: lxv v0, 416(r1)
|
|
; PWR9LE-NEXT: lxv v1, 384(r1)
|
|
; PWR9LE-NEXT: lxv v14, 512(r1)
|
|
; PWR9LE-NEXT: lxv v15, 256(r1)
|
|
; PWR9LE-NEXT: lxv v16, 320(r1)
|
|
; PWR9LE-NEXT: lxv v17, 448(r1)
|
|
; PWR9LE-NEXT: xvadddp v12, v12, v17
|
|
; PWR9LE-NEXT: xvadddp v4, v4, v16
|
|
; PWR9LE-NEXT: xvadddp v14, v15, v14
|
|
; PWR9LE-NEXT: xvadddp v1, v8, v1
|
|
; PWR9LE-NEXT: xvadddp v0, v10, v0
|
|
; PWR9LE-NEXT: xvadddp vs13, v2, vs13
|
|
; PWR9LE-NEXT: xvadddp vs11, vs12, vs11
|
|
; PWR9LE-NEXT: xvadddp vs10, v6, vs10
|
|
; PWR9LE-NEXT: xvadddp vs9, v13, vs9
|
|
; PWR9LE-NEXT: xvadddp vs8, v5, vs8
|
|
; PWR9LE-NEXT: xvadddp vs6, vs7, vs6
|
|
; PWR9LE-NEXT: xvadddp vs5, v9, vs5
|
|
; PWR9LE-NEXT: xvadddp vs4, v11, vs4
|
|
; PWR9LE-NEXT: xvadddp vs1, vs2, vs1
|
|
; PWR9LE-NEXT: xvadddp vs0, v7, vs0
|
|
; PWR9LE-NEXT: xvadddp vs0, vs0, vs1
|
|
; PWR9LE-NEXT: xvadddp vs1, vs3, vs4
|
|
; PWR9LE-NEXT: xvadddp vs2, vs5, vs6
|
|
; PWR9LE-NEXT: xvadddp vs3, vs8, vs9
|
|
; PWR9LE-NEXT: xvadddp vs4, vs10, vs11
|
|
; PWR9LE-NEXT: xvadddp vs5, vs13, v0
|
|
; PWR9LE-NEXT: xvadddp vs6, v1, v14
|
|
; PWR9LE-NEXT: xvadddp vs7, v4, v12
|
|
; PWR9LE-NEXT: xvadddp vs6, vs7, vs6
|
|
; PWR9LE-NEXT: xvadddp vs4, vs5, vs4
|
|
; PWR9LE-NEXT: xvadddp vs2, vs3, vs2
|
|
; PWR9LE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR9LE-NEXT: xvadddp vs0, vs0, vs2
|
|
; PWR9LE-NEXT: xvadddp vs1, vs4, vs6
|
|
; PWR9LE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: xvadddp vs0, vs0, vs1
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v64f64_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: lxv vs0, 384(r1)
|
|
; PWR9BE-NEXT: lxv vs1, 512(r1)
|
|
; PWR9BE-NEXT: lxv vs2, 256(r1)
|
|
; PWR9BE-NEXT: lxv vs3, 320(r1)
|
|
; PWR9BE-NEXT: xvadddp vs3, v3, vs3
|
|
; PWR9BE-NEXT: lxv vs4, 448(r1)
|
|
; PWR9BE-NEXT: lxv vs5, 416(r1)
|
|
; PWR9BE-NEXT: lxv vs6, 544(r1)
|
|
; PWR9BE-NEXT: lxv vs7, 288(r1)
|
|
; PWR9BE-NEXT: lxv vs8, 352(r1)
|
|
; PWR9BE-NEXT: lxv vs9, 480(r1)
|
|
; PWR9BE-NEXT: lxv vs10, 368(r1)
|
|
; PWR9BE-NEXT: lxv vs11, 496(r1)
|
|
; PWR9BE-NEXT: lxv vs12, 240(r1)
|
|
; PWR9BE-NEXT: lxv vs13, 304(r1)
|
|
; PWR9BE-NEXT: lxv v0, 432(r1)
|
|
; PWR9BE-NEXT: lxv v1, 400(r1)
|
|
; PWR9BE-NEXT: lxv v14, 528(r1)
|
|
; PWR9BE-NEXT: lxv v15, 272(r1)
|
|
; PWR9BE-NEXT: lxv v16, 336(r1)
|
|
; PWR9BE-NEXT: lxv v17, 464(r1)
|
|
; PWR9BE-NEXT: xvadddp v12, v12, v17
|
|
; PWR9BE-NEXT: xvadddp v4, v4, v16
|
|
; PWR9BE-NEXT: xvadddp v14, v15, v14
|
|
; PWR9BE-NEXT: xvadddp v1, v8, v1
|
|
; PWR9BE-NEXT: xvadddp v0, v10, v0
|
|
; PWR9BE-NEXT: xvadddp vs13, v2, vs13
|
|
; PWR9BE-NEXT: xvadddp vs11, vs12, vs11
|
|
; PWR9BE-NEXT: xvadddp vs10, v6, vs10
|
|
; PWR9BE-NEXT: xvadddp vs9, v13, vs9
|
|
; PWR9BE-NEXT: xvadddp vs8, v5, vs8
|
|
; PWR9BE-NEXT: xvadddp vs6, vs7, vs6
|
|
; PWR9BE-NEXT: xvadddp vs5, v9, vs5
|
|
; PWR9BE-NEXT: xvadddp vs4, v11, vs4
|
|
; PWR9BE-NEXT: xvadddp vs1, vs2, vs1
|
|
; PWR9BE-NEXT: xvadddp vs0, v7, vs0
|
|
; PWR9BE-NEXT: xvadddp vs0, vs0, vs1
|
|
; PWR9BE-NEXT: xvadddp vs1, vs3, vs4
|
|
; PWR9BE-NEXT: xvadddp vs2, vs5, vs6
|
|
; PWR9BE-NEXT: xvadddp vs3, vs8, vs9
|
|
; PWR9BE-NEXT: xvadddp vs4, vs10, vs11
|
|
; PWR9BE-NEXT: xvadddp vs5, vs13, v0
|
|
; PWR9BE-NEXT: xvadddp vs6, v1, v14
|
|
; PWR9BE-NEXT: xvadddp vs7, v4, v12
|
|
; PWR9BE-NEXT: xvadddp vs6, vs7, vs6
|
|
; PWR9BE-NEXT: xvadddp vs4, vs5, vs4
|
|
; PWR9BE-NEXT: xvadddp vs2, vs3, vs2
|
|
; PWR9BE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR9BE-NEXT: xvadddp vs0, vs0, vs2
|
|
; PWR9BE-NEXT: xvadddp vs1, vs4, vs6
|
|
; PWR9BE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR9BE-NEXT: xxswapd vs1, vs0
|
|
; PWR9BE-NEXT: xvadddp vs1, vs0, vs1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v64f64_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: lxv vs0, 368(r1)
|
|
; PWR10LE-NEXT: lxv vs1, 496(r1)
|
|
; PWR10LE-NEXT: xvadddp vs0, v7, vs0
|
|
; PWR10LE-NEXT: lxv vs2, 240(r1)
|
|
; PWR10LE-NEXT: lxv vs3, 304(r1)
|
|
; PWR10LE-NEXT: lxv vs4, 432(r1)
|
|
; PWR10LE-NEXT: lxv vs5, 400(r1)
|
|
; PWR10LE-NEXT: lxv vs6, 528(r1)
|
|
; PWR10LE-NEXT: lxv vs7, 272(r1)
|
|
; PWR10LE-NEXT: lxv vs8, 336(r1)
|
|
; PWR10LE-NEXT: lxv vs9, 464(r1)
|
|
; PWR10LE-NEXT: lxv vs10, 352(r1)
|
|
; PWR10LE-NEXT: lxv vs11, 480(r1)
|
|
; PWR10LE-NEXT: lxv vs12, 224(r1)
|
|
; PWR10LE-NEXT: lxv vs13, 288(r1)
|
|
; PWR10LE-NEXT: xvadddp vs13, v2, vs13
|
|
; PWR10LE-NEXT: xvadddp vs11, vs12, vs11
|
|
; PWR10LE-NEXT: xvadddp vs10, v6, vs10
|
|
; PWR10LE-NEXT: xvadddp vs9, v13, vs9
|
|
; PWR10LE-NEXT: xvadddp vs8, v5, vs8
|
|
; PWR10LE-NEXT: xvadddp vs6, vs7, vs6
|
|
; PWR10LE-NEXT: xvadddp vs5, v9, vs5
|
|
; PWR10LE-NEXT: xvadddp vs4, v11, vs4
|
|
; PWR10LE-NEXT: xvadddp vs3, v3, vs3
|
|
; PWR10LE-NEXT: xvadddp vs1, vs2, vs1
|
|
; PWR10LE-NEXT: xvadddp vs0, vs0, vs1
|
|
; PWR10LE-NEXT: lxv v0, 416(r1)
|
|
; PWR10LE-NEXT: lxv v1, 384(r1)
|
|
; PWR10LE-NEXT: lxv v14, 512(r1)
|
|
; PWR10LE-NEXT: lxv v15, 256(r1)
|
|
; PWR10LE-NEXT: lxv v16, 320(r1)
|
|
; PWR10LE-NEXT: lxv v17, 448(r1)
|
|
; PWR10LE-NEXT: xvadddp v12, v12, v17
|
|
; PWR10LE-NEXT: xvadddp v4, v4, v16
|
|
; PWR10LE-NEXT: xvadddp v14, v15, v14
|
|
; PWR10LE-NEXT: xvadddp v1, v8, v1
|
|
; PWR10LE-NEXT: xvadddp v0, v10, v0
|
|
; PWR10LE-NEXT: xvadddp vs1, vs3, vs4
|
|
; PWR10LE-NEXT: xvadddp vs2, vs5, vs6
|
|
; PWR10LE-NEXT: xvadddp vs3, vs8, vs9
|
|
; PWR10LE-NEXT: xvadddp vs4, vs10, vs11
|
|
; PWR10LE-NEXT: xvadddp vs5, vs13, v0
|
|
; PWR10LE-NEXT: xvadddp vs6, v1, v14
|
|
; PWR10LE-NEXT: xvadddp vs7, v4, v12
|
|
; PWR10LE-NEXT: xvadddp vs6, vs7, vs6
|
|
; PWR10LE-NEXT: xvadddp vs4, vs5, vs4
|
|
; PWR10LE-NEXT: xvadddp vs2, vs3, vs2
|
|
; PWR10LE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR10LE-NEXT: xvadddp vs0, vs0, vs2
|
|
; PWR10LE-NEXT: xvadddp vs1, vs4, vs6
|
|
; PWR10LE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: xvadddp vs0, vs0, vs1
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v64f64_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: lxv vs0, 384(r1)
|
|
; PWR10BE-NEXT: lxv vs1, 512(r1)
|
|
; PWR10BE-NEXT: xvadddp vs0, v7, vs0
|
|
; PWR10BE-NEXT: lxv vs2, 256(r1)
|
|
; PWR10BE-NEXT: lxv vs3, 320(r1)
|
|
; PWR10BE-NEXT: lxv vs4, 448(r1)
|
|
; PWR10BE-NEXT: lxv vs5, 416(r1)
|
|
; PWR10BE-NEXT: lxv vs6, 544(r1)
|
|
; PWR10BE-NEXT: lxv vs7, 288(r1)
|
|
; PWR10BE-NEXT: lxv vs8, 352(r1)
|
|
; PWR10BE-NEXT: lxv vs9, 480(r1)
|
|
; PWR10BE-NEXT: lxv vs10, 368(r1)
|
|
; PWR10BE-NEXT: lxv vs11, 496(r1)
|
|
; PWR10BE-NEXT: lxv vs12, 240(r1)
|
|
; PWR10BE-NEXT: lxv vs13, 304(r1)
|
|
; PWR10BE-NEXT: xvadddp vs13, v2, vs13
|
|
; PWR10BE-NEXT: xvadddp vs11, vs12, vs11
|
|
; PWR10BE-NEXT: xvadddp vs10, v6, vs10
|
|
; PWR10BE-NEXT: xvadddp vs9, v13, vs9
|
|
; PWR10BE-NEXT: xvadddp vs8, v5, vs8
|
|
; PWR10BE-NEXT: xvadddp vs6, vs7, vs6
|
|
; PWR10BE-NEXT: xvadddp vs5, v9, vs5
|
|
; PWR10BE-NEXT: xvadddp vs4, v11, vs4
|
|
; PWR10BE-NEXT: xvadddp vs3, v3, vs3
|
|
; PWR10BE-NEXT: xvadddp vs1, vs2, vs1
|
|
; PWR10BE-NEXT: xvadddp vs0, vs0, vs1
|
|
; PWR10BE-NEXT: lxv v0, 432(r1)
|
|
; PWR10BE-NEXT: lxv v1, 400(r1)
|
|
; PWR10BE-NEXT: lxv v14, 528(r1)
|
|
; PWR10BE-NEXT: lxv v15, 272(r1)
|
|
; PWR10BE-NEXT: lxv v16, 336(r1)
|
|
; PWR10BE-NEXT: lxv v17, 464(r1)
|
|
; PWR10BE-NEXT: xvadddp v12, v12, v17
|
|
; PWR10BE-NEXT: xvadddp v4, v4, v16
|
|
; PWR10BE-NEXT: xvadddp v14, v15, v14
|
|
; PWR10BE-NEXT: xvadddp v1, v8, v1
|
|
; PWR10BE-NEXT: xvadddp v0, v10, v0
|
|
; PWR10BE-NEXT: xvadddp vs1, vs3, vs4
|
|
; PWR10BE-NEXT: xvadddp vs2, vs5, vs6
|
|
; PWR10BE-NEXT: xvadddp vs3, vs8, vs9
|
|
; PWR10BE-NEXT: xvadddp vs4, vs10, vs11
|
|
; PWR10BE-NEXT: xvadddp vs5, vs13, v0
|
|
; PWR10BE-NEXT: xvadddp vs6, v1, v14
|
|
; PWR10BE-NEXT: xvadddp vs7, v4, v12
|
|
; PWR10BE-NEXT: xvadddp vs6, vs7, vs6
|
|
; PWR10BE-NEXT: xvadddp vs4, vs5, vs4
|
|
; PWR10BE-NEXT: xvadddp vs2, vs3, vs2
|
|
; PWR10BE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR10BE-NEXT: xvadddp vs0, vs0, vs2
|
|
; PWR10BE-NEXT: xvadddp vs1, vs4, vs6
|
|
; PWR10BE-NEXT: xvadddp vs0, vs1, vs0
|
|
; PWR10BE-NEXT: xxswapd vs1, vs0
|
|
; PWR10BE-NEXT: xvadddp vs1, vs0, vs1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast double @llvm.vector.reduce.fadd.v64f64(double -0.000000e+00, <64 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) #0
|
|
declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>) #0
|
|
declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>) #0
|
|
declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>) #0
|
|
declare double @llvm.vector.reduce.fadd.v32f64(double, <32 x double>) #0
|
|
declare double @llvm.vector.reduce.fadd.v64f64(double, <64 x double>) #0
|
|
|
|
;;
|
|
;; Vectors of ppc_fp128
|
|
;;
|
|
define dso_local ppc_fp128 @v2ppcf128(<2 x ppc_fp128> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v2ppcf128:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: mflr r0
|
|
; PWR9LE-NEXT: stdu r1, -32(r1)
|
|
; PWR9LE-NEXT: std r0, 48(r1)
|
|
; PWR9LE-NEXT: bl __gcc_qadd
|
|
; PWR9LE-NEXT: nop
|
|
; PWR9LE-NEXT: addi r1, r1, 32
|
|
; PWR9LE-NEXT: ld r0, 16(r1)
|
|
; PWR9LE-NEXT: mtlr r0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v2ppcf128:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: mflr r0
|
|
; PWR9BE-NEXT: stdu r1, -112(r1)
|
|
; PWR9BE-NEXT: std r0, 128(r1)
|
|
; PWR9BE-NEXT: bl __gcc_qadd
|
|
; PWR9BE-NEXT: nop
|
|
; PWR9BE-NEXT: addi r1, r1, 112
|
|
; PWR9BE-NEXT: ld r0, 16(r1)
|
|
; PWR9BE-NEXT: mtlr r0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v2ppcf128:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: mflr r0
|
|
; PWR10LE-NEXT: std r0, 16(r1)
|
|
; PWR10LE-NEXT: stdu r1, -32(r1)
|
|
; PWR10LE-NEXT: bl __gcc_qadd@notoc
|
|
; PWR10LE-NEXT: addi r1, r1, 32
|
|
; PWR10LE-NEXT: ld r0, 16(r1)
|
|
; PWR10LE-NEXT: mtlr r0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v2ppcf128:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: mflr r0
|
|
; PWR10BE-NEXT: std r0, 16(r1)
|
|
; PWR10BE-NEXT: stdu r1, -112(r1)
|
|
; PWR10BE-NEXT: bl __gcc_qadd
|
|
; PWR10BE-NEXT: nop
|
|
; PWR10BE-NEXT: addi r1, r1, 112
|
|
; PWR10BE-NEXT: ld r0, 16(r1)
|
|
; PWR10BE-NEXT: mtlr r0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call ppc_fp128 @llvm.vector.reduce.fadd.v2ppcf128(ppc_fp128 0xM80000000000000000000000000000000, <2 x ppc_fp128> %a)
|
|
ret ppc_fp128 %0
|
|
}
|
|
|
|
define dso_local ppc_fp128 @v2ppcf128_b(<2 x ppc_fp128> %a, ppc_fp128 %b) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v2ppcf128_b:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: mflr r0
|
|
; PWR9LE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
|
|
; PWR9LE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
|
|
; PWR9LE-NEXT: stdu r1, -48(r1)
|
|
; PWR9LE-NEXT: fmr f31, f4
|
|
; PWR9LE-NEXT: fmr f30, f3
|
|
; PWR9LE-NEXT: fmr f4, f2
|
|
; PWR9LE-NEXT: fmr f3, f1
|
|
; PWR9LE-NEXT: fmr f1, f5
|
|
; PWR9LE-NEXT: fmr f2, f6
|
|
; PWR9LE-NEXT: std r0, 64(r1)
|
|
; PWR9LE-NEXT: bl __gcc_qadd
|
|
; PWR9LE-NEXT: nop
|
|
; PWR9LE-NEXT: fmr f3, f30
|
|
; PWR9LE-NEXT: fmr f4, f31
|
|
; PWR9LE-NEXT: bl __gcc_qadd
|
|
; PWR9LE-NEXT: nop
|
|
; PWR9LE-NEXT: addi r1, r1, 48
|
|
; PWR9LE-NEXT: ld r0, 16(r1)
|
|
; PWR9LE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
|
|
; PWR9LE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
|
|
; PWR9LE-NEXT: mtlr r0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v2ppcf128_b:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: mflr r0
|
|
; PWR9BE-NEXT: stdu r1, -128(r1)
|
|
; PWR9BE-NEXT: std r0, 144(r1)
|
|
; PWR9BE-NEXT: stfd f30, 112(r1) # 8-byte Folded Spill
|
|
; PWR9BE-NEXT: stfd f31, 120(r1) # 8-byte Folded Spill
|
|
; PWR9BE-NEXT: fmr f31, f4
|
|
; PWR9BE-NEXT: fmr f30, f3
|
|
; PWR9BE-NEXT: fmr f4, f2
|
|
; PWR9BE-NEXT: fmr f3, f1
|
|
; PWR9BE-NEXT: fmr f1, f5
|
|
; PWR9BE-NEXT: fmr f2, f6
|
|
; PWR9BE-NEXT: bl __gcc_qadd
|
|
; PWR9BE-NEXT: nop
|
|
; PWR9BE-NEXT: fmr f3, f30
|
|
; PWR9BE-NEXT: fmr f4, f31
|
|
; PWR9BE-NEXT: bl __gcc_qadd
|
|
; PWR9BE-NEXT: nop
|
|
; PWR9BE-NEXT: lfd f31, 120(r1) # 8-byte Folded Reload
|
|
; PWR9BE-NEXT: lfd f30, 112(r1) # 8-byte Folded Reload
|
|
; PWR9BE-NEXT: addi r1, r1, 128
|
|
; PWR9BE-NEXT: ld r0, 16(r1)
|
|
; PWR9BE-NEXT: mtlr r0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v2ppcf128_b:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: mflr r0
|
|
; PWR10LE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
|
|
; PWR10LE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
|
|
; PWR10LE-NEXT: std r0, 16(r1)
|
|
; PWR10LE-NEXT: stdu r1, -48(r1)
|
|
; PWR10LE-NEXT: fmr f31, f4
|
|
; PWR10LE-NEXT: fmr f30, f3
|
|
; PWR10LE-NEXT: fmr f4, f2
|
|
; PWR10LE-NEXT: fmr f3, f1
|
|
; PWR10LE-NEXT: fmr f1, f5
|
|
; PWR10LE-NEXT: fmr f2, f6
|
|
; PWR10LE-NEXT: bl __gcc_qadd@notoc
|
|
; PWR10LE-NEXT: fmr f3, f30
|
|
; PWR10LE-NEXT: fmr f4, f31
|
|
; PWR10LE-NEXT: bl __gcc_qadd@notoc
|
|
; PWR10LE-NEXT: addi r1, r1, 48
|
|
; PWR10LE-NEXT: ld r0, 16(r1)
|
|
; PWR10LE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
|
|
; PWR10LE-NEXT: mtlr r0
|
|
; PWR10LE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v2ppcf128_b:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: mflr r0
|
|
; PWR10BE-NEXT: std r0, 16(r1)
|
|
; PWR10BE-NEXT: stdu r1, -128(r1)
|
|
; PWR10BE-NEXT: stfd f30, 112(r1) # 8-byte Folded Spill
|
|
; PWR10BE-NEXT: stfd f31, 120(r1) # 8-byte Folded Spill
|
|
; PWR10BE-NEXT: fmr f31, f4
|
|
; PWR10BE-NEXT: fmr f30, f3
|
|
; PWR10BE-NEXT: fmr f4, f2
|
|
; PWR10BE-NEXT: fmr f3, f1
|
|
; PWR10BE-NEXT: fmr f1, f5
|
|
; PWR10BE-NEXT: fmr f2, f6
|
|
; PWR10BE-NEXT: bl __gcc_qadd
|
|
; PWR10BE-NEXT: nop
|
|
; PWR10BE-NEXT: fmr f3, f30
|
|
; PWR10BE-NEXT: fmr f4, f31
|
|
; PWR10BE-NEXT: bl __gcc_qadd
|
|
; PWR10BE-NEXT: nop
|
|
; PWR10BE-NEXT: lfd f31, 120(r1) # 8-byte Folded Reload
|
|
; PWR10BE-NEXT: lfd f30, 112(r1) # 8-byte Folded Reload
|
|
; PWR10BE-NEXT: addi r1, r1, 128
|
|
; PWR10BE-NEXT: ld r0, 16(r1)
|
|
; PWR10BE-NEXT: mtlr r0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call ppc_fp128 @llvm.vector.reduce.fadd.v2ppcf128(ppc_fp128 %b, <2 x ppc_fp128> %a)
|
|
ret ppc_fp128 %0
|
|
}
|
|
|
|
define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v2ppcf128_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: mflr r0
|
|
; PWR9LE-NEXT: stdu r1, -64(r1)
|
|
; PWR9LE-NEXT: std r0, 80(r1)
|
|
; PWR9LE-NEXT: bl __gcc_qadd
|
|
; PWR9LE-NEXT: nop
|
|
; PWR9LE-NEXT: stfd f2, 40(r1)
|
|
; PWR9LE-NEXT: stfd f1, 32(r1)
|
|
; PWR9LE-NEXT: lxv vs1, 32(r1)
|
|
; PWR9LE-NEXT: xxswapd vs2, vs1
|
|
; PWR9LE-NEXT: addi r1, r1, 64
|
|
; PWR9LE-NEXT: ld r0, 16(r1)
|
|
; PWR9LE-NEXT: mtlr r0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v2ppcf128_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: mflr r0
|
|
; PWR9BE-NEXT: stdu r1, -144(r1)
|
|
; PWR9BE-NEXT: std r0, 160(r1)
|
|
; PWR9BE-NEXT: bl __gcc_qadd
|
|
; PWR9BE-NEXT: nop
|
|
; PWR9BE-NEXT: stfd f2, 120(r1)
|
|
; PWR9BE-NEXT: stfd f1, 112(r1)
|
|
; PWR9BE-NEXT: lxv vs1, 112(r1)
|
|
; PWR9BE-NEXT: xxswapd vs2, vs1
|
|
; PWR9BE-NEXT: addi r1, r1, 144
|
|
; PWR9BE-NEXT: ld r0, 16(r1)
|
|
; PWR9BE-NEXT: mtlr r0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v2ppcf128_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: mflr r0
|
|
; PWR10LE-NEXT: std r0, 16(r1)
|
|
; PWR10LE-NEXT: stdu r1, -64(r1)
|
|
; PWR10LE-NEXT: bl __gcc_qadd@notoc
|
|
; PWR10LE-NEXT: stfd f2, 40(r1)
|
|
; PWR10LE-NEXT: stfd f1, 32(r1)
|
|
; PWR10LE-NEXT: lxv vs1, 32(r1)
|
|
; PWR10LE-NEXT: xxswapd vs2, vs1
|
|
; PWR10LE-NEXT: addi r1, r1, 64
|
|
; PWR10LE-NEXT: ld r0, 16(r1)
|
|
; PWR10LE-NEXT: mtlr r0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v2ppcf128_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: mflr r0
|
|
; PWR10BE-NEXT: std r0, 16(r1)
|
|
; PWR10BE-NEXT: stdu r1, -144(r1)
|
|
; PWR10BE-NEXT: bl __gcc_qadd
|
|
; PWR10BE-NEXT: nop
|
|
; PWR10BE-NEXT: stfd f2, 120(r1)
|
|
; PWR10BE-NEXT: stfd f1, 112(r1)
|
|
; PWR10BE-NEXT: lxv vs1, 112(r1)
|
|
; PWR10BE-NEXT: xxswapd vs2, vs1
|
|
; PWR10BE-NEXT: addi r1, r1, 144
|
|
; PWR10BE-NEXT: ld r0, 16(r1)
|
|
; PWR10BE-NEXT: mtlr r0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast ppc_fp128 @llvm.vector.reduce.fadd.v2ppcf128(ppc_fp128 0xM80000000000000000000000000000000, <2 x ppc_fp128> %a)
|
|
ret ppc_fp128 %0
|
|
}
|
|
|
|
define dso_local ppc_fp128 @v4ppcf128(<4 x ppc_fp128> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v4ppcf128:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: mflr r0
|
|
; PWR9LE-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
|
|
; PWR9LE-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
|
|
; PWR9LE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
|
|
; PWR9LE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
|
|
; PWR9LE-NEXT: stdu r1, -64(r1)
|
|
; PWR9LE-NEXT: std r0, 80(r1)
|
|
; PWR9LE-NEXT: fmr f31, f8
|
|
; PWR9LE-NEXT: fmr f30, f7
|
|
; PWR9LE-NEXT: fmr f29, f6
|
|
; PWR9LE-NEXT: fmr f28, f5
|
|
; PWR9LE-NEXT: bl __gcc_qadd
|
|
; PWR9LE-NEXT: nop
|
|
; PWR9LE-NEXT: fmr f3, f28
|
|
; PWR9LE-NEXT: fmr f4, f29
|
|
; PWR9LE-NEXT: bl __gcc_qadd
|
|
; PWR9LE-NEXT: nop
|
|
; PWR9LE-NEXT: fmr f3, f30
|
|
; PWR9LE-NEXT: fmr f4, f31
|
|
; PWR9LE-NEXT: bl __gcc_qadd
|
|
; PWR9LE-NEXT: nop
|
|
; PWR9LE-NEXT: addi r1, r1, 64
|
|
; PWR9LE-NEXT: ld r0, 16(r1)
|
|
; PWR9LE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
|
|
; PWR9LE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
|
|
; PWR9LE-NEXT: mtlr r0
|
|
; PWR9LE-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload
|
|
; PWR9LE-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v4ppcf128:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: mflr r0
|
|
; PWR9BE-NEXT: stdu r1, -144(r1)
|
|
; PWR9BE-NEXT: std r0, 160(r1)
|
|
; PWR9BE-NEXT: stfd f28, 112(r1) # 8-byte Folded Spill
|
|
; PWR9BE-NEXT: stfd f29, 120(r1) # 8-byte Folded Spill
|
|
; PWR9BE-NEXT: stfd f30, 128(r1) # 8-byte Folded Spill
|
|
; PWR9BE-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill
|
|
; PWR9BE-NEXT: fmr f31, f8
|
|
; PWR9BE-NEXT: fmr f30, f7
|
|
; PWR9BE-NEXT: fmr f29, f6
|
|
; PWR9BE-NEXT: fmr f28, f5
|
|
; PWR9BE-NEXT: bl __gcc_qadd
|
|
; PWR9BE-NEXT: nop
|
|
; PWR9BE-NEXT: fmr f3, f28
|
|
; PWR9BE-NEXT: fmr f4, f29
|
|
; PWR9BE-NEXT: bl __gcc_qadd
|
|
; PWR9BE-NEXT: nop
|
|
; PWR9BE-NEXT: fmr f3, f30
|
|
; PWR9BE-NEXT: fmr f4, f31
|
|
; PWR9BE-NEXT: bl __gcc_qadd
|
|
; PWR9BE-NEXT: nop
|
|
; PWR9BE-NEXT: lfd f31, 136(r1) # 8-byte Folded Reload
|
|
; PWR9BE-NEXT: lfd f30, 128(r1) # 8-byte Folded Reload
|
|
; PWR9BE-NEXT: lfd f29, 120(r1) # 8-byte Folded Reload
|
|
; PWR9BE-NEXT: lfd f28, 112(r1) # 8-byte Folded Reload
|
|
; PWR9BE-NEXT: addi r1, r1, 144
|
|
; PWR9BE-NEXT: ld r0, 16(r1)
|
|
; PWR9BE-NEXT: mtlr r0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v4ppcf128:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: mflr r0
|
|
; PWR10LE-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
|
|
; PWR10LE-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
|
|
; PWR10LE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
|
|
; PWR10LE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
|
|
; PWR10LE-NEXT: std r0, 16(r1)
|
|
; PWR10LE-NEXT: stdu r1, -64(r1)
|
|
; PWR10LE-NEXT: fmr f31, f8
|
|
; PWR10LE-NEXT: fmr f30, f7
|
|
; PWR10LE-NEXT: fmr f29, f6
|
|
; PWR10LE-NEXT: fmr f28, f5
|
|
; PWR10LE-NEXT: bl __gcc_qadd@notoc
|
|
; PWR10LE-NEXT: fmr f3, f28
|
|
; PWR10LE-NEXT: fmr f4, f29
|
|
; PWR10LE-NEXT: bl __gcc_qadd@notoc
|
|
; PWR10LE-NEXT: fmr f3, f30
|
|
; PWR10LE-NEXT: fmr f4, f31
|
|
; PWR10LE-NEXT: bl __gcc_qadd@notoc
|
|
; PWR10LE-NEXT: addi r1, r1, 64
|
|
; PWR10LE-NEXT: ld r0, 16(r1)
|
|
; PWR10LE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
|
|
; PWR10LE-NEXT: mtlr r0
|
|
; PWR10LE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
|
|
; PWR10LE-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload
|
|
; PWR10LE-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v4ppcf128:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: mflr r0
|
|
; PWR10BE-NEXT: std r0, 16(r1)
|
|
; PWR10BE-NEXT: stdu r1, -144(r1)
|
|
; PWR10BE-NEXT: stfd f28, 112(r1) # 8-byte Folded Spill
|
|
; PWR10BE-NEXT: stfd f29, 120(r1) # 8-byte Folded Spill
|
|
; PWR10BE-NEXT: fmr f29, f6
|
|
; PWR10BE-NEXT: fmr f28, f5
|
|
; PWR10BE-NEXT: stfd f30, 128(r1) # 8-byte Folded Spill
|
|
; PWR10BE-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill
|
|
; PWR10BE-NEXT: fmr f31, f8
|
|
; PWR10BE-NEXT: fmr f30, f7
|
|
; PWR10BE-NEXT: bl __gcc_qadd
|
|
; PWR10BE-NEXT: nop
|
|
; PWR10BE-NEXT: fmr f3, f28
|
|
; PWR10BE-NEXT: fmr f4, f29
|
|
; PWR10BE-NEXT: bl __gcc_qadd
|
|
; PWR10BE-NEXT: nop
|
|
; PWR10BE-NEXT: fmr f3, f30
|
|
; PWR10BE-NEXT: fmr f4, f31
|
|
; PWR10BE-NEXT: bl __gcc_qadd
|
|
; PWR10BE-NEXT: nop
|
|
; PWR10BE-NEXT: lfd f31, 136(r1) # 8-byte Folded Reload
|
|
; PWR10BE-NEXT: lfd f30, 128(r1) # 8-byte Folded Reload
|
|
; PWR10BE-NEXT: lfd f29, 120(r1) # 8-byte Folded Reload
|
|
; PWR10BE-NEXT: lfd f28, 112(r1) # 8-byte Folded Reload
|
|
; PWR10BE-NEXT: addi r1, r1, 144
|
|
; PWR10BE-NEXT: ld r0, 16(r1)
|
|
; PWR10BE-NEXT: mtlr r0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call ppc_fp128 @llvm.vector.reduce.fadd.v4ppcf128(ppc_fp128 0xM80000000000000000000000000000000, <4 x ppc_fp128> %a)
|
|
ret ppc_fp128 %0
|
|
}
|
|
|
|
define dso_local ppc_fp128 @v4ppcf128_b(<4 x ppc_fp128> %a, ppc_fp128 %b) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v4ppcf128_b:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: mflr r0
|
|
; PWR9LE-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill
|
|
; PWR9LE-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill
|
|
; PWR9LE-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
|
|
; PWR9LE-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
|
|
; PWR9LE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
|
|
; PWR9LE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
|
|
; PWR9LE-NEXT: stdu r1, -80(r1)
|
|
; PWR9LE-NEXT: fmr f27, f4
|
|
; PWR9LE-NEXT: fmr f26, f3
|
|
; PWR9LE-NEXT: fmr f4, f2
|
|
; PWR9LE-NEXT: fmr f3, f1
|
|
; PWR9LE-NEXT: fmr f1, f9
|
|
; PWR9LE-NEXT: fmr f2, f10
|
|
; PWR9LE-NEXT: std r0, 96(r1)
|
|
; PWR9LE-NEXT: fmr f31, f8
|
|
; PWR9LE-NEXT: fmr f30, f7
|
|
; PWR9LE-NEXT: fmr f29, f6
|
|
; PWR9LE-NEXT: fmr f28, f5
|
|
; PWR9LE-NEXT: bl __gcc_qadd
|
|
; PWR9LE-NEXT: nop
|
|
; PWR9LE-NEXT: fmr f3, f26
|
|
; PWR9LE-NEXT: fmr f4, f27
|
|
; PWR9LE-NEXT: bl __gcc_qadd
|
|
; PWR9LE-NEXT: nop
|
|
; PWR9LE-NEXT: fmr f3, f28
|
|
; PWR9LE-NEXT: fmr f4, f29
|
|
; PWR9LE-NEXT: bl __gcc_qadd
|
|
; PWR9LE-NEXT: nop
|
|
; PWR9LE-NEXT: fmr f3, f30
|
|
; PWR9LE-NEXT: fmr f4, f31
|
|
; PWR9LE-NEXT: bl __gcc_qadd
|
|
; PWR9LE-NEXT: nop
|
|
; PWR9LE-NEXT: addi r1, r1, 80
|
|
; PWR9LE-NEXT: ld r0, 16(r1)
|
|
; PWR9LE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
|
|
; PWR9LE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
|
|
; PWR9LE-NEXT: mtlr r0
|
|
; PWR9LE-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload
|
|
; PWR9LE-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload
|
|
; PWR9LE-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload
|
|
; PWR9LE-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v4ppcf128_b:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: mflr r0
|
|
; PWR9BE-NEXT: stdu r1, -160(r1)
|
|
; PWR9BE-NEXT: std r0, 176(r1)
|
|
; PWR9BE-NEXT: stfd f26, 112(r1) # 8-byte Folded Spill
|
|
; PWR9BE-NEXT: stfd f27, 120(r1) # 8-byte Folded Spill
|
|
; PWR9BE-NEXT: fmr f27, f4
|
|
; PWR9BE-NEXT: fmr f26, f3
|
|
; PWR9BE-NEXT: fmr f4, f2
|
|
; PWR9BE-NEXT: fmr f3, f1
|
|
; PWR9BE-NEXT: fmr f1, f9
|
|
; PWR9BE-NEXT: fmr f2, f10
|
|
; PWR9BE-NEXT: stfd f28, 128(r1) # 8-byte Folded Spill
|
|
; PWR9BE-NEXT: stfd f29, 136(r1) # 8-byte Folded Spill
|
|
; PWR9BE-NEXT: stfd f30, 144(r1) # 8-byte Folded Spill
|
|
; PWR9BE-NEXT: stfd f31, 152(r1) # 8-byte Folded Spill
|
|
; PWR9BE-NEXT: fmr f31, f8
|
|
; PWR9BE-NEXT: fmr f30, f7
|
|
; PWR9BE-NEXT: fmr f29, f6
|
|
; PWR9BE-NEXT: fmr f28, f5
|
|
; PWR9BE-NEXT: bl __gcc_qadd
|
|
; PWR9BE-NEXT: nop
|
|
; PWR9BE-NEXT: fmr f3, f26
|
|
; PWR9BE-NEXT: fmr f4, f27
|
|
; PWR9BE-NEXT: bl __gcc_qadd
|
|
; PWR9BE-NEXT: nop
|
|
; PWR9BE-NEXT: fmr f3, f28
|
|
; PWR9BE-NEXT: fmr f4, f29
|
|
; PWR9BE-NEXT: bl __gcc_qadd
|
|
; PWR9BE-NEXT: nop
|
|
; PWR9BE-NEXT: fmr f3, f30
|
|
; PWR9BE-NEXT: fmr f4, f31
|
|
; PWR9BE-NEXT: bl __gcc_qadd
|
|
; PWR9BE-NEXT: nop
|
|
; PWR9BE-NEXT: lfd f31, 152(r1) # 8-byte Folded Reload
|
|
; PWR9BE-NEXT: lfd f30, 144(r1) # 8-byte Folded Reload
|
|
; PWR9BE-NEXT: lfd f29, 136(r1) # 8-byte Folded Reload
|
|
; PWR9BE-NEXT: lfd f28, 128(r1) # 8-byte Folded Reload
|
|
; PWR9BE-NEXT: lfd f27, 120(r1) # 8-byte Folded Reload
|
|
; PWR9BE-NEXT: lfd f26, 112(r1) # 8-byte Folded Reload
|
|
; PWR9BE-NEXT: addi r1, r1, 160
|
|
; PWR9BE-NEXT: ld r0, 16(r1)
|
|
; PWR9BE-NEXT: mtlr r0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v4ppcf128_b:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: mflr r0
|
|
; PWR10LE-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill
|
|
; PWR10LE-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill
|
|
; PWR10LE-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
|
|
; PWR10LE-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
|
|
; PWR10LE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
|
|
; PWR10LE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
|
|
; PWR10LE-NEXT: std r0, 16(r1)
|
|
; PWR10LE-NEXT: stdu r1, -80(r1)
|
|
; PWR10LE-NEXT: fmr f27, f4
|
|
; PWR10LE-NEXT: fmr f26, f3
|
|
; PWR10LE-NEXT: fmr f4, f2
|
|
; PWR10LE-NEXT: fmr f3, f1
|
|
; PWR10LE-NEXT: fmr f1, f9
|
|
; PWR10LE-NEXT: fmr f2, f10
|
|
; PWR10LE-NEXT: fmr f31, f8
|
|
; PWR10LE-NEXT: fmr f30, f7
|
|
; PWR10LE-NEXT: fmr f29, f6
|
|
; PWR10LE-NEXT: fmr f28, f5
|
|
; PWR10LE-NEXT: bl __gcc_qadd@notoc
|
|
; PWR10LE-NEXT: fmr f3, f26
|
|
; PWR10LE-NEXT: fmr f4, f27
|
|
; PWR10LE-NEXT: bl __gcc_qadd@notoc
|
|
; PWR10LE-NEXT: fmr f3, f28
|
|
; PWR10LE-NEXT: fmr f4, f29
|
|
; PWR10LE-NEXT: bl __gcc_qadd@notoc
|
|
; PWR10LE-NEXT: fmr f3, f30
|
|
; PWR10LE-NEXT: fmr f4, f31
|
|
; PWR10LE-NEXT: bl __gcc_qadd@notoc
|
|
; PWR10LE-NEXT: addi r1, r1, 80
|
|
; PWR10LE-NEXT: ld r0, 16(r1)
|
|
; PWR10LE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
|
|
; PWR10LE-NEXT: mtlr r0
|
|
; PWR10LE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
|
|
; PWR10LE-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload
|
|
; PWR10LE-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload
|
|
; PWR10LE-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload
|
|
; PWR10LE-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v4ppcf128_b:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: mflr r0
|
|
; PWR10BE-NEXT: std r0, 16(r1)
|
|
; PWR10BE-NEXT: stdu r1, -160(r1)
|
|
; PWR10BE-NEXT: stfd f26, 112(r1) # 8-byte Folded Spill
|
|
; PWR10BE-NEXT: stfd f27, 120(r1) # 8-byte Folded Spill
|
|
; PWR10BE-NEXT: fmr f27, f4
|
|
; PWR10BE-NEXT: fmr f26, f3
|
|
; PWR10BE-NEXT: fmr f4, f2
|
|
; PWR10BE-NEXT: fmr f3, f1
|
|
; PWR10BE-NEXT: fmr f1, f9
|
|
; PWR10BE-NEXT: stfd f28, 128(r1) # 8-byte Folded Spill
|
|
; PWR10BE-NEXT: stfd f29, 136(r1) # 8-byte Folded Spill
|
|
; PWR10BE-NEXT: fmr f2, f10
|
|
; PWR10BE-NEXT: fmr f29, f6
|
|
; PWR10BE-NEXT: fmr f28, f5
|
|
; PWR10BE-NEXT: stfd f30, 144(r1) # 8-byte Folded Spill
|
|
; PWR10BE-NEXT: stfd f31, 152(r1) # 8-byte Folded Spill
|
|
; PWR10BE-NEXT: fmr f31, f8
|
|
; PWR10BE-NEXT: fmr f30, f7
|
|
; PWR10BE-NEXT: bl __gcc_qadd
|
|
; PWR10BE-NEXT: nop
|
|
; PWR10BE-NEXT: fmr f3, f26
|
|
; PWR10BE-NEXT: fmr f4, f27
|
|
; PWR10BE-NEXT: bl __gcc_qadd
|
|
; PWR10BE-NEXT: nop
|
|
; PWR10BE-NEXT: fmr f3, f28
|
|
; PWR10BE-NEXT: fmr f4, f29
|
|
; PWR10BE-NEXT: bl __gcc_qadd
|
|
; PWR10BE-NEXT: nop
|
|
; PWR10BE-NEXT: fmr f3, f30
|
|
; PWR10BE-NEXT: fmr f4, f31
|
|
; PWR10BE-NEXT: bl __gcc_qadd
|
|
; PWR10BE-NEXT: nop
|
|
; PWR10BE-NEXT: lfd f31, 152(r1) # 8-byte Folded Reload
|
|
; PWR10BE-NEXT: lfd f30, 144(r1) # 8-byte Folded Reload
|
|
; PWR10BE-NEXT: lfd f29, 136(r1) # 8-byte Folded Reload
|
|
; PWR10BE-NEXT: lfd f28, 128(r1) # 8-byte Folded Reload
|
|
; PWR10BE-NEXT: lfd f27, 120(r1) # 8-byte Folded Reload
|
|
; PWR10BE-NEXT: lfd f26, 112(r1) # 8-byte Folded Reload
|
|
; PWR10BE-NEXT: addi r1, r1, 160
|
|
; PWR10BE-NEXT: ld r0, 16(r1)
|
|
; PWR10BE-NEXT: mtlr r0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call ppc_fp128 @llvm.vector.reduce.fadd.v4ppcf128(ppc_fp128 %b, <4 x ppc_fp128> %a)
|
|
ret ppc_fp128 %0
|
|
}
|
|
|
|
define dso_local ppc_fp128 @v4ppcf128_fast(<4 x ppc_fp128> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v4ppcf128_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: mflr r0
|
|
; PWR9LE-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill
|
|
; PWR9LE-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill
|
|
; PWR9LE-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
|
|
; PWR9LE-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
|
|
; PWR9LE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
|
|
; PWR9LE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
|
|
; PWR9LE-NEXT: stdu r1, -96(r1)
|
|
; PWR9LE-NEXT: fmr f29, f4
|
|
; PWR9LE-NEXT: fmr f28, f3
|
|
; PWR9LE-NEXT: fmr f3, f5
|
|
; PWR9LE-NEXT: fmr f4, f6
|
|
; PWR9LE-NEXT: std r0, 112(r1)
|
|
; PWR9LE-NEXT: fmr f31, f8
|
|
; PWR9LE-NEXT: fmr f30, f7
|
|
; PWR9LE-NEXT: bl __gcc_qadd
|
|
; PWR9LE-NEXT: nop
|
|
; PWR9LE-NEXT: fmr f27, f1
|
|
; PWR9LE-NEXT: fmr f26, f2
|
|
; PWR9LE-NEXT: fmr f1, f28
|
|
; PWR9LE-NEXT: fmr f2, f29
|
|
; PWR9LE-NEXT: fmr f3, f30
|
|
; PWR9LE-NEXT: fmr f4, f31
|
|
; PWR9LE-NEXT: bl __gcc_qadd
|
|
; PWR9LE-NEXT: nop
|
|
; PWR9LE-NEXT: fmr f3, f1
|
|
; PWR9LE-NEXT: fmr f4, f2
|
|
; PWR9LE-NEXT: fmr f1, f27
|
|
; PWR9LE-NEXT: fmr f2, f26
|
|
; PWR9LE-NEXT: bl __gcc_qadd
|
|
; PWR9LE-NEXT: nop
|
|
; PWR9LE-NEXT: stfd f2, 40(r1)
|
|
; PWR9LE-NEXT: stfd f1, 32(r1)
|
|
; PWR9LE-NEXT: lxv vs1, 32(r1)
|
|
; PWR9LE-NEXT: xxswapd vs2, vs1
|
|
; PWR9LE-NEXT: addi r1, r1, 96
|
|
; PWR9LE-NEXT: ld r0, 16(r1)
|
|
; PWR9LE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
|
|
; PWR9LE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
|
|
; PWR9LE-NEXT: mtlr r0
|
|
; PWR9LE-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload
|
|
; PWR9LE-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload
|
|
; PWR9LE-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload
|
|
; PWR9LE-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v4ppcf128_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: mflr r0
|
|
; PWR9BE-NEXT: stdu r1, -176(r1)
|
|
; PWR9BE-NEXT: std r0, 192(r1)
|
|
; PWR9BE-NEXT: stfd f28, 144(r1) # 8-byte Folded Spill
|
|
; PWR9BE-NEXT: stfd f29, 152(r1) # 8-byte Folded Spill
|
|
; PWR9BE-NEXT: fmr f29, f4
|
|
; PWR9BE-NEXT: fmr f28, f3
|
|
; PWR9BE-NEXT: fmr f3, f5
|
|
; PWR9BE-NEXT: fmr f4, f6
|
|
; PWR9BE-NEXT: stfd f26, 128(r1) # 8-byte Folded Spill
|
|
; PWR9BE-NEXT: stfd f27, 136(r1) # 8-byte Folded Spill
|
|
; PWR9BE-NEXT: stfd f30, 160(r1) # 8-byte Folded Spill
|
|
; PWR9BE-NEXT: stfd f31, 168(r1) # 8-byte Folded Spill
|
|
; PWR9BE-NEXT: fmr f31, f8
|
|
; PWR9BE-NEXT: fmr f30, f7
|
|
; PWR9BE-NEXT: bl __gcc_qadd
|
|
; PWR9BE-NEXT: nop
|
|
; PWR9BE-NEXT: fmr f27, f1
|
|
; PWR9BE-NEXT: fmr f26, f2
|
|
; PWR9BE-NEXT: fmr f1, f28
|
|
; PWR9BE-NEXT: fmr f2, f29
|
|
; PWR9BE-NEXT: fmr f3, f30
|
|
; PWR9BE-NEXT: fmr f4, f31
|
|
; PWR9BE-NEXT: bl __gcc_qadd
|
|
; PWR9BE-NEXT: nop
|
|
; PWR9BE-NEXT: fmr f3, f1
|
|
; PWR9BE-NEXT: fmr f4, f2
|
|
; PWR9BE-NEXT: fmr f1, f27
|
|
; PWR9BE-NEXT: fmr f2, f26
|
|
; PWR9BE-NEXT: bl __gcc_qadd
|
|
; PWR9BE-NEXT: nop
|
|
; PWR9BE-NEXT: stfd f2, 120(r1)
|
|
; PWR9BE-NEXT: stfd f1, 112(r1)
|
|
; PWR9BE-NEXT: lxv vs1, 112(r1)
|
|
; PWR9BE-NEXT: lfd f31, 168(r1) # 8-byte Folded Reload
|
|
; PWR9BE-NEXT: lfd f30, 160(r1) # 8-byte Folded Reload
|
|
; PWR9BE-NEXT: xxswapd vs2, vs1
|
|
; PWR9BE-NEXT: lfd f29, 152(r1) # 8-byte Folded Reload
|
|
; PWR9BE-NEXT: lfd f28, 144(r1) # 8-byte Folded Reload
|
|
; PWR9BE-NEXT: lfd f27, 136(r1) # 8-byte Folded Reload
|
|
; PWR9BE-NEXT: lfd f26, 128(r1) # 8-byte Folded Reload
|
|
; PWR9BE-NEXT: addi r1, r1, 176
|
|
; PWR9BE-NEXT: ld r0, 16(r1)
|
|
; PWR9BE-NEXT: mtlr r0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v4ppcf128_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: mflr r0
|
|
; PWR10LE-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill
|
|
; PWR10LE-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill
|
|
; PWR10LE-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
|
|
; PWR10LE-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
|
|
; PWR10LE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
|
|
; PWR10LE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
|
|
; PWR10LE-NEXT: std r0, 16(r1)
|
|
; PWR10LE-NEXT: stdu r1, -96(r1)
|
|
; PWR10LE-NEXT: fmr f29, f4
|
|
; PWR10LE-NEXT: fmr f28, f3
|
|
; PWR10LE-NEXT: fmr f3, f5
|
|
; PWR10LE-NEXT: fmr f4, f6
|
|
; PWR10LE-NEXT: fmr f31, f8
|
|
; PWR10LE-NEXT: fmr f30, f7
|
|
; PWR10LE-NEXT: bl __gcc_qadd@notoc
|
|
; PWR10LE-NEXT: fmr f27, f1
|
|
; PWR10LE-NEXT: fmr f26, f2
|
|
; PWR10LE-NEXT: fmr f1, f28
|
|
; PWR10LE-NEXT: fmr f2, f29
|
|
; PWR10LE-NEXT: fmr f3, f30
|
|
; PWR10LE-NEXT: fmr f4, f31
|
|
; PWR10LE-NEXT: bl __gcc_qadd@notoc
|
|
; PWR10LE-NEXT: fmr f3, f1
|
|
; PWR10LE-NEXT: fmr f4, f2
|
|
; PWR10LE-NEXT: fmr f1, f27
|
|
; PWR10LE-NEXT: fmr f2, f26
|
|
; PWR10LE-NEXT: bl __gcc_qadd@notoc
|
|
; PWR10LE-NEXT: stfd f2, 40(r1)
|
|
; PWR10LE-NEXT: stfd f1, 32(r1)
|
|
; PWR10LE-NEXT: lxv vs1, 32(r1)
|
|
; PWR10LE-NEXT: xxswapd vs2, vs1
|
|
; PWR10LE-NEXT: addi r1, r1, 96
|
|
; PWR10LE-NEXT: ld r0, 16(r1)
|
|
; PWR10LE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
|
|
; PWR10LE-NEXT: mtlr r0
|
|
; PWR10LE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
|
|
; PWR10LE-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload
|
|
; PWR10LE-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload
|
|
; PWR10LE-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload
|
|
; PWR10LE-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v4ppcf128_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: mflr r0
|
|
; PWR10BE-NEXT: std r0, 16(r1)
|
|
; PWR10BE-NEXT: stdu r1, -176(r1)
|
|
; PWR10BE-NEXT: stfd f28, 144(r1) # 8-byte Folded Spill
|
|
; PWR10BE-NEXT: stfd f29, 152(r1) # 8-byte Folded Spill
|
|
; PWR10BE-NEXT: fmr f29, f4
|
|
; PWR10BE-NEXT: fmr f28, f3
|
|
; PWR10BE-NEXT: fmr f3, f5
|
|
; PWR10BE-NEXT: fmr f4, f6
|
|
; PWR10BE-NEXT: stfd f26, 128(r1) # 8-byte Folded Spill
|
|
; PWR10BE-NEXT: stfd f27, 136(r1) # 8-byte Folded Spill
|
|
; PWR10BE-NEXT: stfd f30, 160(r1) # 8-byte Folded Spill
|
|
; PWR10BE-NEXT: stfd f31, 168(r1) # 8-byte Folded Spill
|
|
; PWR10BE-NEXT: fmr f31, f8
|
|
; PWR10BE-NEXT: fmr f30, f7
|
|
; PWR10BE-NEXT: bl __gcc_qadd
|
|
; PWR10BE-NEXT: nop
|
|
; PWR10BE-NEXT: fmr f27, f1
|
|
; PWR10BE-NEXT: fmr f26, f2
|
|
; PWR10BE-NEXT: fmr f1, f28
|
|
; PWR10BE-NEXT: fmr f2, f29
|
|
; PWR10BE-NEXT: fmr f3, f30
|
|
; PWR10BE-NEXT: fmr f4, f31
|
|
; PWR10BE-NEXT: bl __gcc_qadd
|
|
; PWR10BE-NEXT: nop
|
|
; PWR10BE-NEXT: fmr f3, f1
|
|
; PWR10BE-NEXT: fmr f4, f2
|
|
; PWR10BE-NEXT: fmr f1, f27
|
|
; PWR10BE-NEXT: fmr f2, f26
|
|
; PWR10BE-NEXT: bl __gcc_qadd
|
|
; PWR10BE-NEXT: nop
|
|
; PWR10BE-NEXT: stfd f2, 120(r1)
|
|
; PWR10BE-NEXT: stfd f1, 112(r1)
|
|
; PWR10BE-NEXT: lfd f31, 168(r1) # 8-byte Folded Reload
|
|
; PWR10BE-NEXT: lfd f30, 160(r1) # 8-byte Folded Reload
|
|
; PWR10BE-NEXT: lfd f29, 152(r1) # 8-byte Folded Reload
|
|
; PWR10BE-NEXT: lfd f28, 144(r1) # 8-byte Folded Reload
|
|
; PWR10BE-NEXT: lfd f27, 136(r1) # 8-byte Folded Reload
|
|
; PWR10BE-NEXT: lfd f26, 128(r1) # 8-byte Folded Reload
|
|
; PWR10BE-NEXT: lxv vs1, 112(r1)
|
|
; PWR10BE-NEXT: xxswapd vs2, vs1
|
|
; PWR10BE-NEXT: addi r1, r1, 176
|
|
; PWR10BE-NEXT: ld r0, 16(r1)
|
|
; PWR10BE-NEXT: mtlr r0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast ppc_fp128 @llvm.vector.reduce.fadd.v4ppcf128(ppc_fp128 0xM80000000000000000000000000000000, <4 x ppc_fp128> %a)
|
|
ret ppc_fp128 %0
|
|
}
|
|
|
|
declare ppc_fp128 @llvm.vector.reduce.fadd.v2ppcf128(ppc_fp128, <2 x ppc_fp128>) #0
|
|
declare ppc_fp128 @llvm.vector.reduce.fadd.v4ppcf128(ppc_fp128, <4 x ppc_fp128>) #0
|
|
|
|
attributes #0 = { nounwind }
|