
Count of input operands affect pipeline forwarding in scheduling model. Previous Power10 model definition arranges some instructions into incorrect groups, by counting the wrong number of input operands. This patch updates the model, setting the input operands count correctly by excluding irrelevant immediate operands and count memory operands of load instructions correctly. Reviewed By: shchenz Differential Revision: https://reviews.llvm.org/D153842
1809 lines
59 KiB
LLVM
1809 lines
59 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE
|
|
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE
|
|
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mcpu=pwr10 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR10LE
|
|
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mcpu=pwr10 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR10BE
|
|
|
|
;;
|
|
;; Vectors of i8
|
|
;;
|
|
define dso_local i8 @v2i8(<2 x i8> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v2i8:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: vspltb v3, v2, 14
|
|
; PWR9LE-NEXT: li r3, 0
|
|
; PWR9LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9LE-NEXT: vextubrx r3, r3, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v2i8:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: vspltb v3, v2, 1
|
|
; PWR9BE-NEXT: li r3, 0
|
|
; PWR9BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9BE-NEXT: vextublx r3, r3, v2
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v2i8:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: vspltb v3, v2, 14
|
|
; PWR10LE-NEXT: li r3, 0
|
|
; PWR10LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10LE-NEXT: vextubrx r3, r3, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v2i8:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: vspltb v3, v2, 1
|
|
; PWR10BE-NEXT: li r3, 0
|
|
; PWR10BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10BE-NEXT: vextublx r3, r3, v2
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a)
|
|
ret i8 %0
|
|
}
|
|
|
|
define dso_local i8 @v4i8(<4 x i8> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v4i8:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: vsplth v3, v2, 6
|
|
; PWR9LE-NEXT: li r3, 0
|
|
; PWR9LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9LE-NEXT: vspltb v3, v2, 14
|
|
; PWR9LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9LE-NEXT: vextubrx r3, r3, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v4i8:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: vsplth v3, v2, 1
|
|
; PWR9BE-NEXT: li r3, 0
|
|
; PWR9BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9BE-NEXT: vspltb v3, v2, 1
|
|
; PWR9BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9BE-NEXT: vextublx r3, r3, v2
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v4i8:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: vsplth v3, v2, 6
|
|
; PWR10LE-NEXT: li r3, 0
|
|
; PWR10LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10LE-NEXT: vspltb v3, v2, 14
|
|
; PWR10LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10LE-NEXT: vextubrx r3, r3, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v4i8:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: vsplth v3, v2, 1
|
|
; PWR10BE-NEXT: li r3, 0
|
|
; PWR10BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10BE-NEXT: vspltb v3, v2, 1
|
|
; PWR10BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10BE-NEXT: vextublx r3, r3, v2
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a)
|
|
ret i8 %0
|
|
}
|
|
|
|
define dso_local i8 @v8i8(<8 x i8> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v8i8:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR9LE-NEXT: li r3, 0
|
|
; PWR9LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9LE-NEXT: vsplth v3, v2, 6
|
|
; PWR9LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9LE-NEXT: vspltb v3, v2, 14
|
|
; PWR9LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9LE-NEXT: vextubrx r3, r3, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v8i8:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR9BE-NEXT: li r3, 0
|
|
; PWR9BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9BE-NEXT: vsplth v3, v2, 1
|
|
; PWR9BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9BE-NEXT: vspltb v3, v2, 1
|
|
; PWR9BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9BE-NEXT: vextublx r3, r3, v2
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v8i8:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR10LE-NEXT: li r3, 0
|
|
; PWR10LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10LE-NEXT: vsplth v3, v2, 6
|
|
; PWR10LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10LE-NEXT: vspltb v3, v2, 14
|
|
; PWR10LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10LE-NEXT: vextubrx r3, r3, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v8i8:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR10BE-NEXT: li r3, 0
|
|
; PWR10BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10BE-NEXT: vsplth v3, v2, 1
|
|
; PWR10BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10BE-NEXT: vspltb v3, v2, 1
|
|
; PWR10BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10BE-NEXT: vextublx r3, r3, v2
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a)
|
|
ret i8 %0
|
|
}
|
|
|
|
define dso_local signext i8 @v16i8_sign(<16 x i8> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16i8_sign:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: li r3, 0
|
|
; PWR9LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR9LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9LE-NEXT: vsplth v3, v2, 6
|
|
; PWR9LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9LE-NEXT: vspltb v3, v2, 14
|
|
; PWR9LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9LE-NEXT: vextubrx r3, r3, v2
|
|
; PWR9LE-NEXT: extsb r3, r3
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16i8_sign:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: li r3, 0
|
|
; PWR9BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR9BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9BE-NEXT: vsplth v3, v2, 1
|
|
; PWR9BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9BE-NEXT: vspltb v3, v2, 1
|
|
; PWR9BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9BE-NEXT: vextublx r3, r3, v2
|
|
; PWR9BE-NEXT: extsb r3, r3
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16i8_sign:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: li r3, 0
|
|
; PWR10LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR10LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10LE-NEXT: vsplth v3, v2, 6
|
|
; PWR10LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10LE-NEXT: vspltb v3, v2, 14
|
|
; PWR10LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10LE-NEXT: vextubrx r3, r3, v2
|
|
; PWR10LE-NEXT: extsb r3, r3
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16i8_sign:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: li r3, 0
|
|
; PWR10BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR10BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10BE-NEXT: vsplth v3, v2, 1
|
|
; PWR10BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10BE-NEXT: vspltb v3, v2, 1
|
|
; PWR10BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10BE-NEXT: vextublx r3, r3, v2
|
|
; PWR10BE-NEXT: extsb r3, r3
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a)
|
|
ret i8 %0
|
|
}
|
|
|
|
define dso_local zeroext i8 @v16i8_zero(<16 x i8> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16i8_zero:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: li r3, 0
|
|
; PWR9LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR9LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9LE-NEXT: vsplth v3, v2, 6
|
|
; PWR9LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9LE-NEXT: vspltb v3, v2, 14
|
|
; PWR9LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9LE-NEXT: vextubrx r3, r3, v2
|
|
; PWR9LE-NEXT: clrldi r3, r3, 56
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16i8_zero:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: li r3, 0
|
|
; PWR9BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR9BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9BE-NEXT: vsplth v3, v2, 1
|
|
; PWR9BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9BE-NEXT: vspltb v3, v2, 1
|
|
; PWR9BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9BE-NEXT: vextublx r3, r3, v2
|
|
; PWR9BE-NEXT: clrldi r3, r3, 56
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16i8_zero:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: li r3, 0
|
|
; PWR10LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR10LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10LE-NEXT: vsplth v3, v2, 6
|
|
; PWR10LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10LE-NEXT: vspltb v3, v2, 14
|
|
; PWR10LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10LE-NEXT: vextubrx r3, r3, v2
|
|
; PWR10LE-NEXT: clrldi r3, r3, 56
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16i8_zero:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: li r3, 0
|
|
; PWR10BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR10BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10BE-NEXT: vsplth v3, v2, 1
|
|
; PWR10BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10BE-NEXT: vspltb v3, v2, 1
|
|
; PWR10BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10BE-NEXT: vextublx r3, r3, v2
|
|
; PWR10BE-NEXT: clrldi r3, r3, 56
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a)
|
|
ret i8 %0
|
|
}
|
|
|
|
define dso_local i8 @v32i8(<32 x i8> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v32i8:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9LE-NEXT: li r3, 0
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR9LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9LE-NEXT: vsplth v3, v2, 6
|
|
; PWR9LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9LE-NEXT: vspltb v3, v2, 14
|
|
; PWR9LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9LE-NEXT: vextubrx r3, r3, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v32i8:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9BE-NEXT: li r3, 0
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR9BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9BE-NEXT: vsplth v3, v2, 1
|
|
; PWR9BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9BE-NEXT: vspltb v3, v2, 1
|
|
; PWR9BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR9BE-NEXT: vextublx r3, r3, v2
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v32i8:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10LE-NEXT: li r3, 0
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR10LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10LE-NEXT: vsplth v3, v2, 6
|
|
; PWR10LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10LE-NEXT: vspltb v3, v2, 14
|
|
; PWR10LE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10LE-NEXT: vextubrx r3, r3, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v32i8:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10BE-NEXT: li r3, 0
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR10BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10BE-NEXT: vsplth v3, v2, 1
|
|
; PWR10BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10BE-NEXT: vspltb v3, v2, 1
|
|
; PWR10BE-NEXT: vaddubm v2, v2, v3
|
|
; PWR10BE-NEXT: vextublx r3, r3, v2
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %a)
|
|
ret i8 %0
|
|
}
|
|
|
|
declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>) #0
|
|
declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) #0
|
|
declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) #0
|
|
declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) #0
|
|
declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>) #0
|
|
|
|
;;
|
|
;; Vectors of i16
|
|
;;
|
|
define dso_local i16 @v2i16(<2 x i16> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v2i16:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: vsplth v3, v2, 6
|
|
; PWR9LE-NEXT: li r3, 0
|
|
; PWR9LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9LE-NEXT: vextuhrx r3, r3, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v2i16:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: vsplth v3, v2, 1
|
|
; PWR9BE-NEXT: li r3, 0
|
|
; PWR9BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9BE-NEXT: vextuhlx r3, r3, v2
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v2i16:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: vsplth v3, v2, 6
|
|
; PWR10LE-NEXT: li r3, 0
|
|
; PWR10LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10LE-NEXT: vextuhrx r3, r3, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v2i16:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: vsplth v3, v2, 1
|
|
; PWR10BE-NEXT: li r3, 0
|
|
; PWR10BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10BE-NEXT: vextuhlx r3, r3, v2
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a)
|
|
ret i16 %0
|
|
}
|
|
|
|
define dso_local i16 @v4i16(<4 x i16> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v4i16:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR9LE-NEXT: li r3, 0
|
|
; PWR9LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9LE-NEXT: vsplth v3, v2, 6
|
|
; PWR9LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9LE-NEXT: vextuhrx r3, r3, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v4i16:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR9BE-NEXT: li r3, 0
|
|
; PWR9BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9BE-NEXT: vsplth v3, v2, 1
|
|
; PWR9BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9BE-NEXT: vextuhlx r3, r3, v2
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v4i16:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR10LE-NEXT: li r3, 0
|
|
; PWR10LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10LE-NEXT: vsplth v3, v2, 6
|
|
; PWR10LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10LE-NEXT: vextuhrx r3, r3, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v4i16:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR10BE-NEXT: li r3, 0
|
|
; PWR10BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10BE-NEXT: vsplth v3, v2, 1
|
|
; PWR10BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10BE-NEXT: vextuhlx r3, r3, v2
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a)
|
|
ret i16 %0
|
|
}
|
|
|
|
define dso_local i16 @v8i16(<8 x i16> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v8i16:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: li r3, 0
|
|
; PWR9LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR9LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9LE-NEXT: vsplth v3, v2, 6
|
|
; PWR9LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9LE-NEXT: vextuhrx r3, r3, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v8i16:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: li r3, 0
|
|
; PWR9BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR9BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9BE-NEXT: vsplth v3, v2, 1
|
|
; PWR9BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9BE-NEXT: vextuhlx r3, r3, v2
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v8i16:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: li r3, 0
|
|
; PWR10LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR10LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10LE-NEXT: vsplth v3, v2, 6
|
|
; PWR10LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10LE-NEXT: vextuhrx r3, r3, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v8i16:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: li r3, 0
|
|
; PWR10BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR10BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10BE-NEXT: vsplth v3, v2, 1
|
|
; PWR10BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10BE-NEXT: vextuhlx r3, r3, v2
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a)
|
|
ret i16 %0
|
|
}
|
|
|
|
define dso_local zeroext i16 @v16i16(<16 x i16> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16i16:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9LE-NEXT: li r3, 0
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR9LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9LE-NEXT: vsplth v3, v2, 6
|
|
; PWR9LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9LE-NEXT: vextuhrx r3, r3, v2
|
|
; PWR9LE-NEXT: clrldi r3, r3, 48
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16i16:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9BE-NEXT: li r3, 0
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR9BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9BE-NEXT: vsplth v3, v2, 1
|
|
; PWR9BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9BE-NEXT: vextuhlx r3, r3, v2
|
|
; PWR9BE-NEXT: clrldi r3, r3, 48
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16i16:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10LE-NEXT: li r3, 0
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR10LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10LE-NEXT: vsplth v3, v2, 6
|
|
; PWR10LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10LE-NEXT: vextuhrx r3, r3, v2
|
|
; PWR10LE-NEXT: clrldi r3, r3, 48
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16i16:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10BE-NEXT: li r3, 0
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR10BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10BE-NEXT: vsplth v3, v2, 1
|
|
; PWR10BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10BE-NEXT: vextuhlx r3, r3, v2
|
|
; PWR10BE-NEXT: clrldi r3, r3, 48
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a)
|
|
ret i16 %0
|
|
}
|
|
|
|
define dso_local signext i16 @v16i8tov16i16_sign(<16 x i8> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16i8tov16i16_sign:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: vmrghb v3, v2, v2
|
|
; PWR9LE-NEXT: vspltish v4, 8
|
|
; PWR9LE-NEXT: li r3, 0
|
|
; PWR9LE-NEXT: vmrglb v2, v2, v2
|
|
; PWR9LE-NEXT: vslh v3, v3, v4
|
|
; PWR9LE-NEXT: vslh v2, v2, v4
|
|
; PWR9LE-NEXT: vsrah v3, v3, v4
|
|
; PWR9LE-NEXT: vsrah v2, v2, v4
|
|
; PWR9LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR9LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9LE-NEXT: vsplth v3, v2, 6
|
|
; PWR9LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9LE-NEXT: vextuhrx r3, r3, v2
|
|
; PWR9LE-NEXT: extsh r3, r3
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16i8tov16i16_sign:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: vmrglb v3, v2, v2
|
|
; PWR9BE-NEXT: vspltish v4, 8
|
|
; PWR9BE-NEXT: li r3, 0
|
|
; PWR9BE-NEXT: vmrghb v2, v2, v2
|
|
; PWR9BE-NEXT: vslh v3, v3, v4
|
|
; PWR9BE-NEXT: vslh v2, v2, v4
|
|
; PWR9BE-NEXT: vsrah v3, v3, v4
|
|
; PWR9BE-NEXT: vsrah v2, v2, v4
|
|
; PWR9BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR9BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9BE-NEXT: vsplth v3, v2, 1
|
|
; PWR9BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9BE-NEXT: vextuhlx r3, r3, v2
|
|
; PWR9BE-NEXT: extsh r3, r3
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16i8tov16i16_sign:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: vmrghb v3, v2, v2
|
|
; PWR10LE-NEXT: xxspltiw v4, 524296
|
|
; PWR10LE-NEXT: vmrglb v2, v2, v2
|
|
; PWR10LE-NEXT: li r3, 0
|
|
; PWR10LE-NEXT: vslh v3, v3, v4
|
|
; PWR10LE-NEXT: vslh v2, v2, v4
|
|
; PWR10LE-NEXT: vsrah v3, v3, v4
|
|
; PWR10LE-NEXT: vsrah v2, v2, v4
|
|
; PWR10LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR10LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10LE-NEXT: vsplth v3, v2, 6
|
|
; PWR10LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10LE-NEXT: vextuhrx r3, r3, v2
|
|
; PWR10LE-NEXT: extsh r3, r3
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16i8tov16i16_sign:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: vmrglb v3, v2, v2
|
|
; PWR10BE-NEXT: xxspltiw v4, 524296
|
|
; PWR10BE-NEXT: vmrghb v2, v2, v2
|
|
; PWR10BE-NEXT: li r3, 0
|
|
; PWR10BE-NEXT: vslh v3, v3, v4
|
|
; PWR10BE-NEXT: vslh v2, v2, v4
|
|
; PWR10BE-NEXT: vsrah v3, v3, v4
|
|
; PWR10BE-NEXT: vsrah v2, v2, v4
|
|
; PWR10BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR10BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10BE-NEXT: vsplth v3, v2, 1
|
|
; PWR10BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10BE-NEXT: vextuhlx r3, r3, v2
|
|
; PWR10BE-NEXT: extsh r3, r3
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = sext <16 x i8> %a to <16 x i16>
|
|
%1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %0)
|
|
ret i16 %1
|
|
}
|
|
|
|
define dso_local zeroext i16 @v16i8tov16i16_zero(<16 x i8> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16i8tov16i16_zero:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxlxor v3, v3, v3
|
|
; PWR9LE-NEXT: li r3, 0
|
|
; PWR9LE-NEXT: vmrghb v4, v3, v2
|
|
; PWR9LE-NEXT: vmrglb v2, v3, v2
|
|
; PWR9LE-NEXT: vadduhm v2, v2, v4
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR9LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9LE-NEXT: vsplth v3, v2, 6
|
|
; PWR9LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9LE-NEXT: vextuhrx r3, r3, v2
|
|
; PWR9LE-NEXT: clrldi r3, r3, 48
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16i8tov16i16_zero:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxlxor v3, v3, v3
|
|
; PWR9BE-NEXT: li r3, 0
|
|
; PWR9BE-NEXT: vmrglb v4, v3, v2
|
|
; PWR9BE-NEXT: vmrghb v2, v3, v2
|
|
; PWR9BE-NEXT: vadduhm v2, v2, v4
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR9BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9BE-NEXT: vsplth v3, v2, 1
|
|
; PWR9BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR9BE-NEXT: vextuhlx r3, r3, v2
|
|
; PWR9BE-NEXT: clrldi r3, r3, 48
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16i8tov16i16_zero:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxlxor v3, v3, v3
|
|
; PWR10LE-NEXT: li r3, 0
|
|
; PWR10LE-NEXT: vmrghb v4, v3, v2
|
|
; PWR10LE-NEXT: vmrglb v2, v3, v2
|
|
; PWR10LE-NEXT: vadduhm v2, v2, v4
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR10LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10LE-NEXT: vsplth v3, v2, 6
|
|
; PWR10LE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10LE-NEXT: vextuhrx r3, r3, v2
|
|
; PWR10LE-NEXT: clrldi r3, r3, 48
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16i8tov16i16_zero:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxlxor v3, v3, v3
|
|
; PWR10BE-NEXT: li r3, 0
|
|
; PWR10BE-NEXT: vmrglb v4, v3, v2
|
|
; PWR10BE-NEXT: vmrghb v2, v3, v2
|
|
; PWR10BE-NEXT: vadduhm v2, v2, v4
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR10BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10BE-NEXT: vsplth v3, v2, 1
|
|
; PWR10BE-NEXT: vadduhm v2, v2, v3
|
|
; PWR10BE-NEXT: vextuhlx r3, r3, v2
|
|
; PWR10BE-NEXT: clrldi r3, r3, 48
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = zext <16 x i8> %a to <16 x i16>
|
|
%1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %0)
|
|
ret i16 %1
|
|
}
|
|
|
|
declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>) #0
|
|
declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) #0
|
|
declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) #0
|
|
declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) #0
|
|
|
|
;;
|
|
;; Vectors of i32
|
|
;;
|
|
define dso_local zeroext i32 @v2i32(<2 x i32> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v2i32:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR9LE-NEXT: li r3, 0
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9LE-NEXT: vextuwrx r3, r3, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v2i32:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR9BE-NEXT: li r3, 0
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9BE-NEXT: vextuwlx r3, r3, v2
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v2i32:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR10LE-NEXT: li r3, 0
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10LE-NEXT: vextuwrx r3, r3, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v2i32:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR10BE-NEXT: li r3, 0
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10BE-NEXT: vextuwlx r3, r3, v2
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a)
|
|
ret i32 %0
|
|
}
|
|
|
|
define dso_local zeroext i32 @v4i32(<4 x i32> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v4i32:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: li r3, 0
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9LE-NEXT: vextuwrx r3, r3, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v4i32:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: li r3, 0
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9BE-NEXT: vextuwlx r3, r3, v2
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v4i32:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: li r3, 0
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10LE-NEXT: vextuwrx r3, r3, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v4i32:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: li r3, 0
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10BE-NEXT: vextuwlx r3, r3, v2
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
|
|
ret i32 %0
|
|
}
|
|
|
|
define dso_local zeroext i32 @v8i32(<8 x i32> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v8i32:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9LE-NEXT: li r3, 0
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9LE-NEXT: vextuwrx r3, r3, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v8i32:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9BE-NEXT: li r3, 0
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9BE-NEXT: vextuwlx r3, r3, v2
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v8i32:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10LE-NEXT: li r3, 0
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10LE-NEXT: vextuwrx r3, r3, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v8i32:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10BE-NEXT: li r3, 0
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10BE-NEXT: vextuwlx r3, r3, v2
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a)
|
|
ret i32 %0
|
|
}
|
|
|
|
define dso_local zeroext i32 @v16i32(<16 x i32> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16i32:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: vadduwm v3, v3, v5
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v4
|
|
; PWR9LE-NEXT: li r3, 0
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9LE-NEXT: vextuwrx r3, r3, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16i32:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: vadduwm v3, v3, v5
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v4
|
|
; PWR9BE-NEXT: li r3, 0
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9BE-NEXT: vextuwlx r3, r3, v2
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16i32:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: vadduwm v3, v3, v5
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v4
|
|
; PWR10LE-NEXT: li r3, 0
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10LE-NEXT: vextuwrx r3, r3, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16i32:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: vadduwm v3, v3, v5
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v4
|
|
; PWR10BE-NEXT: li r3, 0
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10BE-NEXT: vextuwlx r3, r3, v2
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a)
|
|
ret i32 %0
|
|
}
|
|
|
|
define dso_local zeroext i32 @v32i32(<32 x i32> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v32i32:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: vadduwm v4, v4, v8
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v6
|
|
; PWR9LE-NEXT: li r3, 0
|
|
; PWR9LE-NEXT: vadduwm v5, v5, v9
|
|
; PWR9LE-NEXT: vadduwm v3, v3, v7
|
|
; PWR9LE-NEXT: vadduwm v3, v3, v5
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v4
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9LE-NEXT: vextuwrx r3, r3, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v32i32:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: vadduwm v4, v4, v8
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v6
|
|
; PWR9BE-NEXT: li r3, 0
|
|
; PWR9BE-NEXT: vadduwm v5, v5, v9
|
|
; PWR9BE-NEXT: vadduwm v3, v3, v7
|
|
; PWR9BE-NEXT: vadduwm v3, v3, v5
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v4
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9BE-NEXT: vextuwlx r3, r3, v2
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v32i32:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: vadduwm v4, v4, v8
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v6
|
|
; PWR10LE-NEXT: vadduwm v5, v5, v9
|
|
; PWR10LE-NEXT: vadduwm v3, v3, v7
|
|
; PWR10LE-NEXT: li r3, 0
|
|
; PWR10LE-NEXT: vadduwm v3, v3, v5
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v4
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10LE-NEXT: vextuwrx r3, r3, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v32i32:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: vadduwm v4, v4, v8
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v6
|
|
; PWR10BE-NEXT: vadduwm v5, v5, v9
|
|
; PWR10BE-NEXT: vadduwm v3, v3, v7
|
|
; PWR10BE-NEXT: li r3, 0
|
|
; PWR10BE-NEXT: vadduwm v3, v3, v5
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v4
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10BE-NEXT: vextuwlx r3, r3, v2
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %a)
|
|
ret i32 %0
|
|
}
|
|
|
|
define dso_local signext i32 @v16i8tov16i32_sign(<16 x i8> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16i8tov16i32_sign:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI17_0@toc@ha
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI17_0@toc@l
|
|
; PWR9LE-NEXT: lxv v3, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI17_1@toc@ha
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI17_1@toc@l
|
|
; PWR9LE-NEXT: lxv v4, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI17_2@toc@ha
|
|
; PWR9LE-NEXT: vperm v3, v2, v2, v3
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI17_2@toc@l
|
|
; PWR9LE-NEXT: lxv v5, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI17_3@toc@ha
|
|
; PWR9LE-NEXT: vextsb2w v3, v3
|
|
; PWR9LE-NEXT: vperm v4, v2, v2, v4
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI17_3@toc@l
|
|
; PWR9LE-NEXT: lxv v0, 0(r3)
|
|
; PWR9LE-NEXT: vextsb2w v4, v4
|
|
; PWR9LE-NEXT: li r3, 0
|
|
; PWR9LE-NEXT: vperm v5, v2, v2, v5
|
|
; PWR9LE-NEXT: vadduwm v3, v4, v3
|
|
; PWR9LE-NEXT: vextsb2w v5, v5
|
|
; PWR9LE-NEXT: vperm v2, v2, v2, v0
|
|
; PWR9LE-NEXT: vextsb2w v2, v2
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v5
|
|
; PWR9LE-NEXT: vadduwm v2, v3, v2
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9LE-NEXT: vextuwrx r3, r3, v2
|
|
; PWR9LE-NEXT: extsw r3, r3
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16i8tov16i32_sign:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI17_0@toc@ha
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI17_0@toc@l
|
|
; PWR9BE-NEXT: lxv v3, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI17_1@toc@ha
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI17_1@toc@l
|
|
; PWR9BE-NEXT: lxv v4, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI17_2@toc@ha
|
|
; PWR9BE-NEXT: vperm v3, v2, v2, v3
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI17_2@toc@l
|
|
; PWR9BE-NEXT: lxv v5, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI17_3@toc@ha
|
|
; PWR9BE-NEXT: vextsb2w v3, v3
|
|
; PWR9BE-NEXT: vperm v4, v2, v2, v4
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI17_3@toc@l
|
|
; PWR9BE-NEXT: lxv v0, 0(r3)
|
|
; PWR9BE-NEXT: vextsb2w v4, v4
|
|
; PWR9BE-NEXT: li r3, 0
|
|
; PWR9BE-NEXT: vperm v5, v2, v2, v5
|
|
; PWR9BE-NEXT: vadduwm v3, v4, v3
|
|
; PWR9BE-NEXT: vextsb2w v5, v5
|
|
; PWR9BE-NEXT: vperm v2, v2, v2, v0
|
|
; PWR9BE-NEXT: vextsb2w v2, v2
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v5
|
|
; PWR9BE-NEXT: vadduwm v2, v3, v2
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9BE-NEXT: vextuwlx r3, r3, v2
|
|
; PWR9BE-NEXT: extsw r3, r3
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16i8tov16i32_sign:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: plxv v3, .LCPI17_0@PCREL(0), 1
|
|
; PWR10LE-NEXT: plxv v4, .LCPI17_1@PCREL(0), 1
|
|
; PWR10LE-NEXT: li r3, 0
|
|
; PWR10LE-NEXT: vperm v3, v2, v2, v3
|
|
; PWR10LE-NEXT: plxv v5, .LCPI17_2@PCREL(0), 1
|
|
; PWR10LE-NEXT: plxv v0, .LCPI17_3@PCREL(0), 1
|
|
; PWR10LE-NEXT: vperm v4, v2, v2, v4
|
|
; PWR10LE-NEXT: vperm v5, v2, v2, v5
|
|
; PWR10LE-NEXT: vperm v2, v2, v2, v0
|
|
; PWR10LE-NEXT: vextsb2w v3, v3
|
|
; PWR10LE-NEXT: vextsb2w v4, v4
|
|
; PWR10LE-NEXT: vextsb2w v5, v5
|
|
; PWR10LE-NEXT: vextsb2w v2, v2
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v5
|
|
; PWR10LE-NEXT: vadduwm v3, v4, v3
|
|
; PWR10LE-NEXT: vadduwm v2, v3, v2
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10LE-NEXT: vextuwrx r3, r3, v2
|
|
; PWR10LE-NEXT: extsw r3, r3
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16i8tov16i32_sign:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI17_0@toc@ha
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI17_0@toc@l
|
|
; PWR10BE-NEXT: lxv v3, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI17_1@toc@ha
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI17_1@toc@l
|
|
; PWR10BE-NEXT: lxv v4, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI17_2@toc@ha
|
|
; PWR10BE-NEXT: vperm v3, v2, v2, v3
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI17_2@toc@l
|
|
; PWR10BE-NEXT: vextsb2w v3, v3
|
|
; PWR10BE-NEXT: lxv v5, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI17_3@toc@ha
|
|
; PWR10BE-NEXT: vperm v4, v2, v2, v4
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI17_3@toc@l
|
|
; PWR10BE-NEXT: vextsb2w v4, v4
|
|
; PWR10BE-NEXT: lxv v0, 0(r3)
|
|
; PWR10BE-NEXT: li r3, 0
|
|
; PWR10BE-NEXT: vperm v5, v2, v2, v5
|
|
; PWR10BE-NEXT: vadduwm v3, v4, v3
|
|
; PWR10BE-NEXT: vextsb2w v5, v5
|
|
; PWR10BE-NEXT: vperm v2, v2, v2, v0
|
|
; PWR10BE-NEXT: vextsb2w v2, v2
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v5
|
|
; PWR10BE-NEXT: vadduwm v2, v3, v2
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10BE-NEXT: vextuwlx r3, r3, v2
|
|
; PWR10BE-NEXT: extsw r3, r3
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = sext <16 x i8> %a to <16 x i32>
|
|
%1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %0)
|
|
ret i32 %1
|
|
}
|
|
|
|
define dso_local zeroext i32 @v16i8tov16i32_zero(<16 x i8> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16i8tov16i32_zero:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI18_0@toc@ha
|
|
; PWR9LE-NEXT: xxlxor v4, v4, v4
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI18_0@toc@l
|
|
; PWR9LE-NEXT: lxv v3, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI18_1@toc@ha
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI18_1@toc@l
|
|
; PWR9LE-NEXT: lxv v5, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI18_2@toc@ha
|
|
; PWR9LE-NEXT: vperm v3, v4, v2, v3
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI18_2@toc@l
|
|
; PWR9LE-NEXT: lxv v0, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI18_3@toc@ha
|
|
; PWR9LE-NEXT: vperm v5, v4, v2, v5
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI18_3@toc@l
|
|
; PWR9LE-NEXT: lxv v1, 0(r3)
|
|
; PWR9LE-NEXT: vadduwm v3, v5, v3
|
|
; PWR9LE-NEXT: li r3, 0
|
|
; PWR9LE-NEXT: vperm v0, v4, v2, v0
|
|
; PWR9LE-NEXT: vperm v2, v4, v2, v1
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v0
|
|
; PWR9LE-NEXT: vadduwm v2, v3, v2
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR9LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9LE-NEXT: vextuwrx r3, r3, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16i8tov16i32_zero:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI18_0@toc@ha
|
|
; PWR9BE-NEXT: xxlxor v4, v4, v4
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI18_0@toc@l
|
|
; PWR9BE-NEXT: lxv v3, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI18_1@toc@ha
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI18_1@toc@l
|
|
; PWR9BE-NEXT: lxv v5, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI18_2@toc@ha
|
|
; PWR9BE-NEXT: vperm v3, v4, v2, v3
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI18_2@toc@l
|
|
; PWR9BE-NEXT: lxv v0, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI18_3@toc@ha
|
|
; PWR9BE-NEXT: vperm v5, v4, v2, v5
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI18_3@toc@l
|
|
; PWR9BE-NEXT: lxv v1, 0(r3)
|
|
; PWR9BE-NEXT: vadduwm v3, v5, v3
|
|
; PWR9BE-NEXT: li r3, 0
|
|
; PWR9BE-NEXT: vperm v0, v4, v2, v0
|
|
; PWR9BE-NEXT: vperm v2, v4, v2, v1
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v0
|
|
; PWR9BE-NEXT: vadduwm v2, v3, v2
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR9BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR9BE-NEXT: vextuwlx r3, r3, v2
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16i8tov16i32_zero:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: plxv v3, .LCPI18_0@PCREL(0), 1
|
|
; PWR10LE-NEXT: plxv v5, .LCPI18_1@PCREL(0), 1
|
|
; PWR10LE-NEXT: xxlxor v4, v4, v4
|
|
; PWR10LE-NEXT: li r3, 0
|
|
; PWR10LE-NEXT: vperm v3, v4, v2, v3
|
|
; PWR10LE-NEXT: plxv v0, .LCPI18_2@PCREL(0), 1
|
|
; PWR10LE-NEXT: plxv v1, .LCPI18_3@PCREL(0), 1
|
|
; PWR10LE-NEXT: vperm v5, v4, v2, v5
|
|
; PWR10LE-NEXT: vperm v0, v4, v2, v0
|
|
; PWR10LE-NEXT: vperm v2, v4, v2, v1
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v0
|
|
; PWR10LE-NEXT: vadduwm v3, v5, v3
|
|
; PWR10LE-NEXT: vadduwm v2, v3, v2
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10LE-NEXT: xxspltw v3, v2, 2
|
|
; PWR10LE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10LE-NEXT: vextuwrx r3, r3, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16i8tov16i32_zero:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI18_0@toc@ha
|
|
; PWR10BE-NEXT: xxlxor v4, v4, v4
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI18_0@toc@l
|
|
; PWR10BE-NEXT: lxv v3, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI18_1@toc@ha
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI18_1@toc@l
|
|
; PWR10BE-NEXT: lxv v5, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI18_2@toc@ha
|
|
; PWR10BE-NEXT: vperm v3, v4, v2, v3
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI18_2@toc@l
|
|
; PWR10BE-NEXT: lxv v0, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI18_3@toc@ha
|
|
; PWR10BE-NEXT: vperm v5, v4, v2, v5
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI18_3@toc@l
|
|
; PWR10BE-NEXT: vadduwm v3, v5, v3
|
|
; PWR10BE-NEXT: lxv v1, 0(r3)
|
|
; PWR10BE-NEXT: li r3, 0
|
|
; PWR10BE-NEXT: vperm v0, v4, v2, v0
|
|
; PWR10BE-NEXT: vperm v2, v4, v2, v1
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v0
|
|
; PWR10BE-NEXT: vadduwm v2, v3, v2
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10BE-NEXT: xxspltw v3, v2, 1
|
|
; PWR10BE-NEXT: vadduwm v2, v2, v3
|
|
; PWR10BE-NEXT: vextuwlx r3, r3, v2
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = zext <16 x i8> %a to <16 x i32>
|
|
%1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %0)
|
|
ret i32 %1
|
|
}
|
|
|
|
declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) #0
|
|
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #0
|
|
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) #0
|
|
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) #0
|
|
declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>) #0
|
|
|
|
;;
|
|
;; Vectors of i64
|
|
;;
|
|
define dso_local i64 @v2i64(<2 x i64> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v2i64:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9LE-NEXT: mfvsrld r3, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v2i64:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9BE-NEXT: mfvsrd r3, v2
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v2i64:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10LE-NEXT: mfvsrld r3, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v2i64:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10BE-NEXT: mfvsrd r3, v2
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a)
|
|
ret i64 %0
|
|
}
|
|
|
|
define dso_local i64 @v4i64(<4 x i64> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v4i64:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9LE-NEXT: mfvsrld r3, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v4i64:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9BE-NEXT: mfvsrd r3, v2
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v4i64:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10LE-NEXT: mfvsrld r3, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v4i64:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10BE-NEXT: mfvsrd r3, v2
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a)
|
|
ret i64 %0
|
|
}
|
|
|
|
define dso_local i64 @v8i64(<8 x i64> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v8i64:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: vaddudm v3, v3, v5
|
|
; PWR9LE-NEXT: vaddudm v2, v2, v4
|
|
; PWR9LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9LE-NEXT: mfvsrld r3, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v8i64:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: vaddudm v3, v3, v5
|
|
; PWR9BE-NEXT: vaddudm v2, v2, v4
|
|
; PWR9BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9BE-NEXT: mfvsrd r3, v2
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v8i64:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: vaddudm v3, v3, v5
|
|
; PWR10LE-NEXT: vaddudm v2, v2, v4
|
|
; PWR10LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10LE-NEXT: mfvsrld r3, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v8i64:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: vaddudm v3, v3, v5
|
|
; PWR10BE-NEXT: vaddudm v2, v2, v4
|
|
; PWR10BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10BE-NEXT: mfvsrd r3, v2
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a)
|
|
ret i64 %0
|
|
}
|
|
|
|
define dso_local i64 @v16i64(<16 x i64> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16i64:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: vaddudm v4, v4, v8
|
|
; PWR9LE-NEXT: vaddudm v2, v2, v6
|
|
; PWR9LE-NEXT: vaddudm v5, v5, v9
|
|
; PWR9LE-NEXT: vaddudm v3, v3, v7
|
|
; PWR9LE-NEXT: vaddudm v3, v3, v5
|
|
; PWR9LE-NEXT: vaddudm v2, v2, v4
|
|
; PWR9LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9LE-NEXT: mfvsrld r3, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16i64:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: vaddudm v4, v4, v8
|
|
; PWR9BE-NEXT: vaddudm v2, v2, v6
|
|
; PWR9BE-NEXT: vaddudm v5, v5, v9
|
|
; PWR9BE-NEXT: vaddudm v3, v3, v7
|
|
; PWR9BE-NEXT: vaddudm v3, v3, v5
|
|
; PWR9BE-NEXT: vaddudm v2, v2, v4
|
|
; PWR9BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9BE-NEXT: mfvsrd r3, v2
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16i64:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: vaddudm v4, v4, v8
|
|
; PWR10LE-NEXT: vaddudm v5, v5, v9
|
|
; PWR10LE-NEXT: vaddudm v3, v3, v7
|
|
; PWR10LE-NEXT: vaddudm v3, v3, v5
|
|
; PWR10LE-NEXT: vaddudm v2, v2, v6
|
|
; PWR10LE-NEXT: vaddudm v2, v2, v4
|
|
; PWR10LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10LE-NEXT: mfvsrld r3, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16i64:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: vaddudm v4, v4, v8
|
|
; PWR10BE-NEXT: vaddudm v5, v5, v9
|
|
; PWR10BE-NEXT: vaddudm v3, v3, v7
|
|
; PWR10BE-NEXT: vaddudm v3, v3, v5
|
|
; PWR10BE-NEXT: vaddudm v2, v2, v6
|
|
; PWR10BE-NEXT: vaddudm v2, v2, v4
|
|
; PWR10BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10BE-NEXT: mfvsrd r3, v2
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a)
|
|
ret i64 %0
|
|
}
|
|
|
|
define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16i8tov16i64_sign:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI23_0@toc@ha
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI23_0@toc@l
|
|
; PWR9LE-NEXT: lxv v3, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI23_1@toc@ha
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI23_1@toc@l
|
|
; PWR9LE-NEXT: lxv v4, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI23_2@toc@ha
|
|
; PWR9LE-NEXT: vperm v3, v2, v2, v3
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI23_2@toc@l
|
|
; PWR9LE-NEXT: lxv v5, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI23_3@toc@ha
|
|
; PWR9LE-NEXT: vextsb2d v3, v3
|
|
; PWR9LE-NEXT: vperm v4, v2, v2, v4
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI23_3@toc@l
|
|
; PWR9LE-NEXT: lxv v0, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI23_4@toc@ha
|
|
; PWR9LE-NEXT: vextsb2d v4, v4
|
|
; PWR9LE-NEXT: vperm v5, v2, v2, v5
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI23_4@toc@l
|
|
; PWR9LE-NEXT: vaddudm v3, v4, v3
|
|
; PWR9LE-NEXT: lxv v1, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI23_5@toc@ha
|
|
; PWR9LE-NEXT: vextsb2d v5, v5
|
|
; PWR9LE-NEXT: vperm v0, v2, v2, v0
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI23_5@toc@l
|
|
; PWR9LE-NEXT: lxv v6, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI23_6@toc@ha
|
|
; PWR9LE-NEXT: vperm v1, v2, v2, v1
|
|
; PWR9LE-NEXT: vextsb2d v0, v0
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI23_6@toc@l
|
|
; PWR9LE-NEXT: vaddudm v5, v0, v5
|
|
; PWR9LE-NEXT: lxv v7, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI23_7@toc@ha
|
|
; PWR9LE-NEXT: vperm v6, v2, v2, v6
|
|
; PWR9LE-NEXT: vextsb2d v1, v1
|
|
; PWR9LE-NEXT: vaddudm v3, v3, v5
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI23_7@toc@l
|
|
; PWR9LE-NEXT: lxv v8, 0(r3)
|
|
; PWR9LE-NEXT: vextsb2d v6, v6
|
|
; PWR9LE-NEXT: vperm v7, v2, v2, v7
|
|
; PWR9LE-NEXT: vaddudm v1, v6, v1
|
|
; PWR9LE-NEXT: vextsb2d v7, v7
|
|
; PWR9LE-NEXT: vperm v2, v2, v2, v8
|
|
; PWR9LE-NEXT: vextsb2d v2, v2
|
|
; PWR9LE-NEXT: vaddudm v2, v2, v7
|
|
; PWR9LE-NEXT: vaddudm v2, v1, v2
|
|
; PWR9LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9LE-NEXT: mfvsrld r3, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16i8tov16i64_sign:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI23_0@toc@ha
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI23_0@toc@l
|
|
; PWR9BE-NEXT: lxv v3, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI23_1@toc@ha
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI23_1@toc@l
|
|
; PWR9BE-NEXT: lxv v4, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI23_2@toc@ha
|
|
; PWR9BE-NEXT: vperm v3, v2, v2, v3
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI23_2@toc@l
|
|
; PWR9BE-NEXT: lxv v5, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI23_3@toc@ha
|
|
; PWR9BE-NEXT: vextsb2d v3, v3
|
|
; PWR9BE-NEXT: vperm v4, v2, v2, v4
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI23_3@toc@l
|
|
; PWR9BE-NEXT: lxv v0, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI23_4@toc@ha
|
|
; PWR9BE-NEXT: vextsb2d v4, v4
|
|
; PWR9BE-NEXT: vperm v5, v2, v2, v5
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI23_4@toc@l
|
|
; PWR9BE-NEXT: vaddudm v3, v4, v3
|
|
; PWR9BE-NEXT: lxv v1, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI23_5@toc@ha
|
|
; PWR9BE-NEXT: vextsb2d v5, v5
|
|
; PWR9BE-NEXT: vperm v0, v2, v2, v0
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI23_5@toc@l
|
|
; PWR9BE-NEXT: lxv v6, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI23_6@toc@ha
|
|
; PWR9BE-NEXT: vperm v1, v2, v2, v1
|
|
; PWR9BE-NEXT: vextsb2d v0, v0
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI23_6@toc@l
|
|
; PWR9BE-NEXT: vaddudm v5, v0, v5
|
|
; PWR9BE-NEXT: lxv v7, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI23_7@toc@ha
|
|
; PWR9BE-NEXT: vperm v6, v2, v2, v6
|
|
; PWR9BE-NEXT: vextsb2d v1, v1
|
|
; PWR9BE-NEXT: vaddudm v3, v3, v5
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI23_7@toc@l
|
|
; PWR9BE-NEXT: lxv v8, 0(r3)
|
|
; PWR9BE-NEXT: vextsb2d v6, v6
|
|
; PWR9BE-NEXT: vperm v7, v2, v2, v7
|
|
; PWR9BE-NEXT: vaddudm v1, v6, v1
|
|
; PWR9BE-NEXT: vextsb2d v7, v7
|
|
; PWR9BE-NEXT: vperm v2, v2, v2, v8
|
|
; PWR9BE-NEXT: vextsb2d v2, v2
|
|
; PWR9BE-NEXT: vaddudm v2, v2, v7
|
|
; PWR9BE-NEXT: vaddudm v2, v1, v2
|
|
; PWR9BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9BE-NEXT: mfvsrd r3, v2
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16i8tov16i64_sign:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: plxv v3, .LCPI23_0@PCREL(0), 1
|
|
; PWR10LE-NEXT: plxv v4, .LCPI23_1@PCREL(0), 1
|
|
; PWR10LE-NEXT: vperm v3, v2, v2, v3
|
|
; PWR10LE-NEXT: plxv v5, .LCPI23_2@PCREL(0), 1
|
|
; PWR10LE-NEXT: plxv v0, .LCPI23_3@PCREL(0), 1
|
|
; PWR10LE-NEXT: plxv v1, .LCPI23_4@PCREL(0), 1
|
|
; PWR10LE-NEXT: plxv v6, .LCPI23_5@PCREL(0), 1
|
|
; PWR10LE-NEXT: plxv v7, .LCPI23_6@PCREL(0), 1
|
|
; PWR10LE-NEXT: plxv v8, .LCPI23_7@PCREL(0), 1
|
|
; PWR10LE-NEXT: vperm v4, v2, v2, v4
|
|
; PWR10LE-NEXT: vperm v5, v2, v2, v5
|
|
; PWR10LE-NEXT: vperm v0, v2, v2, v0
|
|
; PWR10LE-NEXT: vperm v1, v2, v2, v1
|
|
; PWR10LE-NEXT: vperm v6, v2, v2, v6
|
|
; PWR10LE-NEXT: vperm v7, v2, v2, v7
|
|
; PWR10LE-NEXT: vperm v2, v2, v2, v8
|
|
; PWR10LE-NEXT: vextsb2d v5, v5
|
|
; PWR10LE-NEXT: vextsb2d v0, v0
|
|
; PWR10LE-NEXT: vextsb2d v7, v7
|
|
; PWR10LE-NEXT: vextsb2d v2, v2
|
|
; PWR10LE-NEXT: vextsb2d v3, v3
|
|
; PWR10LE-NEXT: vextsb2d v4, v4
|
|
; PWR10LE-NEXT: vextsb2d v1, v1
|
|
; PWR10LE-NEXT: vextsb2d v6, v6
|
|
; PWR10LE-NEXT: vaddudm v2, v2, v7
|
|
; PWR10LE-NEXT: vaddudm v5, v0, v5
|
|
; PWR10LE-NEXT: vaddudm v3, v4, v3
|
|
; PWR10LE-NEXT: vaddudm v3, v3, v5
|
|
; PWR10LE-NEXT: vaddudm v4, v6, v1
|
|
; PWR10LE-NEXT: vaddudm v2, v4, v2
|
|
; PWR10LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10LE-NEXT: mfvsrld r3, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16i8tov16i64_sign:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI23_0@toc@ha
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI23_0@toc@l
|
|
; PWR10BE-NEXT: lxv v3, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI23_1@toc@ha
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI23_1@toc@l
|
|
; PWR10BE-NEXT: lxv v4, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI23_2@toc@ha
|
|
; PWR10BE-NEXT: vperm v3, v2, v2, v3
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI23_2@toc@l
|
|
; PWR10BE-NEXT: vextsb2d v3, v3
|
|
; PWR10BE-NEXT: lxv v5, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI23_3@toc@ha
|
|
; PWR10BE-NEXT: vperm v4, v2, v2, v4
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI23_3@toc@l
|
|
; PWR10BE-NEXT: vextsb2d v4, v4
|
|
; PWR10BE-NEXT: lxv v0, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI23_4@toc@ha
|
|
; PWR10BE-NEXT: vperm v5, v2, v2, v5
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI23_4@toc@l
|
|
; PWR10BE-NEXT: vextsb2d v5, v5
|
|
; PWR10BE-NEXT: lxv v1, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI23_5@toc@ha
|
|
; PWR10BE-NEXT: vperm v0, v2, v2, v0
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI23_5@toc@l
|
|
; PWR10BE-NEXT: vextsb2d v0, v0
|
|
; PWR10BE-NEXT: lxv v6, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI23_6@toc@ha
|
|
; PWR10BE-NEXT: vperm v1, v2, v2, v1
|
|
; PWR10BE-NEXT: vaddudm v5, v0, v5
|
|
; PWR10BE-NEXT: vaddudm v3, v4, v3
|
|
; PWR10BE-NEXT: vaddudm v3, v3, v5
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI23_6@toc@l
|
|
; PWR10BE-NEXT: vextsb2d v1, v1
|
|
; PWR10BE-NEXT: lxv v7, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI23_7@toc@ha
|
|
; PWR10BE-NEXT: vperm v6, v2, v2, v6
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI23_7@toc@l
|
|
; PWR10BE-NEXT: vextsb2d v6, v6
|
|
; PWR10BE-NEXT: lxv v8, 0(r3)
|
|
; PWR10BE-NEXT: vperm v7, v2, v2, v7
|
|
; PWR10BE-NEXT: vextsb2d v7, v7
|
|
; PWR10BE-NEXT: vperm v2, v2, v2, v8
|
|
; PWR10BE-NEXT: vextsb2d v2, v2
|
|
; PWR10BE-NEXT: vaddudm v2, v2, v7
|
|
; PWR10BE-NEXT: vaddudm v4, v6, v1
|
|
; PWR10BE-NEXT: vaddudm v2, v4, v2
|
|
; PWR10BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10BE-NEXT: mfvsrd r3, v2
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = sext <16 x i8> %a to <16 x i64>
|
|
%1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %0)
|
|
ret i64 %1
|
|
}
|
|
|
|
define dso_local i64 @v16i8tov16i64_zero(<16 x i8> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16i8tov16i64_zero:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI24_0@toc@ha
|
|
; PWR9LE-NEXT: xxlxor v4, v4, v4
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI24_0@toc@l
|
|
; PWR9LE-NEXT: lxv v3, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI24_1@toc@ha
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI24_1@toc@l
|
|
; PWR9LE-NEXT: lxv v5, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI24_2@toc@ha
|
|
; PWR9LE-NEXT: vperm v3, v4, v2, v3
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI24_2@toc@l
|
|
; PWR9LE-NEXT: lxv v0, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI24_3@toc@ha
|
|
; PWR9LE-NEXT: vperm v5, v4, v2, v5
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI24_3@toc@l
|
|
; PWR9LE-NEXT: lxv v1, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI24_4@toc@ha
|
|
; PWR9LE-NEXT: vaddudm v3, v5, v3
|
|
; PWR9LE-NEXT: vperm v0, v4, v2, v0
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI24_4@toc@l
|
|
; PWR9LE-NEXT: lxv v6, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI24_5@toc@ha
|
|
; PWR9LE-NEXT: vperm v1, v4, v2, v1
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI24_5@toc@l
|
|
; PWR9LE-NEXT: lxv v7, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI24_6@toc@ha
|
|
; PWR9LE-NEXT: vaddudm v0, v1, v0
|
|
; PWR9LE-NEXT: vperm v6, v4, v2, v6
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI24_6@toc@l
|
|
; PWR9LE-NEXT: lxv v8, 0(r3)
|
|
; PWR9LE-NEXT: addis r3, r2, .LCPI24_7@toc@ha
|
|
; PWR9LE-NEXT: vaddudm v3, v3, v0
|
|
; PWR9LE-NEXT: vperm v7, v4, v2, v7
|
|
; PWR9LE-NEXT: addi r3, r3, .LCPI24_7@toc@l
|
|
; PWR9LE-NEXT: lxv v9, 0(r3)
|
|
; PWR9LE-NEXT: vperm v8, v4, v2, v8
|
|
; PWR9LE-NEXT: vperm v2, v4, v2, v9
|
|
; PWR9LE-NEXT: vaddudm v4, v7, v6
|
|
; PWR9LE-NEXT: vaddudm v2, v2, v8
|
|
; PWR9LE-NEXT: vaddudm v2, v4, v2
|
|
; PWR9LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9LE-NEXT: mfvsrld r3, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16i8tov16i64_zero:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI24_0@toc@ha
|
|
; PWR9BE-NEXT: xxlxor v4, v4, v4
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI24_0@toc@l
|
|
; PWR9BE-NEXT: lxv v3, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI24_1@toc@ha
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI24_1@toc@l
|
|
; PWR9BE-NEXT: lxv v5, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI24_2@toc@ha
|
|
; PWR9BE-NEXT: vperm v3, v4, v2, v3
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI24_2@toc@l
|
|
; PWR9BE-NEXT: lxv v0, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI24_3@toc@ha
|
|
; PWR9BE-NEXT: vperm v5, v4, v2, v5
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI24_3@toc@l
|
|
; PWR9BE-NEXT: lxv v1, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI24_4@toc@ha
|
|
; PWR9BE-NEXT: vaddudm v3, v5, v3
|
|
; PWR9BE-NEXT: vperm v0, v4, v2, v0
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI24_4@toc@l
|
|
; PWR9BE-NEXT: lxv v6, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI24_5@toc@ha
|
|
; PWR9BE-NEXT: vperm v1, v4, v2, v1
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI24_5@toc@l
|
|
; PWR9BE-NEXT: lxv v7, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI24_6@toc@ha
|
|
; PWR9BE-NEXT: vaddudm v0, v1, v0
|
|
; PWR9BE-NEXT: vperm v6, v4, v2, v6
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI24_6@toc@l
|
|
; PWR9BE-NEXT: lxv v8, 0(r3)
|
|
; PWR9BE-NEXT: addis r3, r2, .LCPI24_7@toc@ha
|
|
; PWR9BE-NEXT: vaddudm v3, v3, v0
|
|
; PWR9BE-NEXT: vperm v7, v4, v2, v7
|
|
; PWR9BE-NEXT: addi r3, r3, .LCPI24_7@toc@l
|
|
; PWR9BE-NEXT: lxv v9, 0(r3)
|
|
; PWR9BE-NEXT: vperm v8, v4, v2, v8
|
|
; PWR9BE-NEXT: vperm v2, v4, v2, v9
|
|
; PWR9BE-NEXT: vaddudm v4, v7, v6
|
|
; PWR9BE-NEXT: vaddudm v2, v2, v8
|
|
; PWR9BE-NEXT: vaddudm v2, v4, v2
|
|
; PWR9BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR9BE-NEXT: mfvsrd r3, v2
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16i8tov16i64_zero:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: plxv v3, .LCPI24_0@PCREL(0), 1
|
|
; PWR10LE-NEXT: plxv v5, .LCPI24_1@PCREL(0), 1
|
|
; PWR10LE-NEXT: xxlxor v4, v4, v4
|
|
; PWR10LE-NEXT: vperm v3, v4, v2, v3
|
|
; PWR10LE-NEXT: plxv v0, .LCPI24_2@PCREL(0), 1
|
|
; PWR10LE-NEXT: plxv v1, .LCPI24_3@PCREL(0), 1
|
|
; PWR10LE-NEXT: plxv v6, .LCPI24_4@PCREL(0), 1
|
|
; PWR10LE-NEXT: plxv v7, .LCPI24_5@PCREL(0), 1
|
|
; PWR10LE-NEXT: plxv v8, .LCPI24_6@PCREL(0), 1
|
|
; PWR10LE-NEXT: plxv v9, .LCPI24_7@PCREL(0), 1
|
|
; PWR10LE-NEXT: vperm v5, v4, v2, v5
|
|
; PWR10LE-NEXT: vperm v0, v4, v2, v0
|
|
; PWR10LE-NEXT: vperm v1, v4, v2, v1
|
|
; PWR10LE-NEXT: vperm v6, v4, v2, v6
|
|
; PWR10LE-NEXT: vperm v7, v4, v2, v7
|
|
; PWR10LE-NEXT: vperm v8, v4, v2, v8
|
|
; PWR10LE-NEXT: vperm v2, v4, v2, v9
|
|
; PWR10LE-NEXT: vaddudm v2, v2, v8
|
|
; PWR10LE-NEXT: vaddudm v4, v1, v0
|
|
; PWR10LE-NEXT: vaddudm v3, v5, v3
|
|
; PWR10LE-NEXT: vaddudm v3, v3, v4
|
|
; PWR10LE-NEXT: vaddudm v4, v7, v6
|
|
; PWR10LE-NEXT: vaddudm v2, v4, v2
|
|
; PWR10LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10LE-NEXT: mfvsrld r3, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16i8tov16i64_zero:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI24_0@toc@ha
|
|
; PWR10BE-NEXT: xxlxor v4, v4, v4
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI24_0@toc@l
|
|
; PWR10BE-NEXT: lxv v3, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI24_1@toc@ha
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI24_1@toc@l
|
|
; PWR10BE-NEXT: lxv v5, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI24_2@toc@ha
|
|
; PWR10BE-NEXT: vperm v3, v4, v2, v3
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI24_2@toc@l
|
|
; PWR10BE-NEXT: lxv v0, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI24_3@toc@ha
|
|
; PWR10BE-NEXT: vperm v5, v4, v2, v5
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI24_3@toc@l
|
|
; PWR10BE-NEXT: lxv v1, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI24_4@toc@ha
|
|
; PWR10BE-NEXT: vperm v0, v4, v2, v0
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI24_4@toc@l
|
|
; PWR10BE-NEXT: lxv v6, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI24_5@toc@ha
|
|
; PWR10BE-NEXT: vperm v1, v4, v2, v1
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI24_5@toc@l
|
|
; PWR10BE-NEXT: lxv v7, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI24_6@toc@ha
|
|
; PWR10BE-NEXT: vperm v6, v4, v2, v6
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI24_6@toc@l
|
|
; PWR10BE-NEXT: lxv v8, 0(r3)
|
|
; PWR10BE-NEXT: addis r3, r2, .LCPI24_7@toc@ha
|
|
; PWR10BE-NEXT: vperm v7, v4, v2, v7
|
|
; PWR10BE-NEXT: addi r3, r3, .LCPI24_7@toc@l
|
|
; PWR10BE-NEXT: lxv v9, 0(r3)
|
|
; PWR10BE-NEXT: vperm v8, v4, v2, v8
|
|
; PWR10BE-NEXT: vperm v2, v4, v2, v9
|
|
; PWR10BE-NEXT: vaddudm v4, v1, v0
|
|
; PWR10BE-NEXT: vaddudm v3, v5, v3
|
|
; PWR10BE-NEXT: vaddudm v3, v3, v4
|
|
; PWR10BE-NEXT: vaddudm v2, v2, v8
|
|
; PWR10BE-NEXT: vaddudm v4, v7, v6
|
|
; PWR10BE-NEXT: vaddudm v2, v4, v2
|
|
; PWR10BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: vaddudm v2, v2, v3
|
|
; PWR10BE-NEXT: mfvsrd r3, v2
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = zext <16 x i8> %a to <16 x i64>
|
|
%1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %0)
|
|
ret i64 %1
|
|
}
|
|
|
|
declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) #0
|
|
declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) #0
|
|
declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) #0
|
|
declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) #0
|
|
|
|
attributes #0 = { nounwind }
|