llvm-project/llvm/test/CodeGen/PowerPC/vector-reduce-umin.ll
Stefan Pintilie e9d12c2480 [PowerPC][NFC] Add a series of codegen tests for vector reductions.
This patch only adds tests for PowerPC. The purpose of these tests
is to track what code is generated for various vector reductions.

Reviewed By: nemanjai, #powerpc

Differential Revision: https://reviews.llvm.org/D113801
2021-11-19 15:03:01 -06:00

797 lines
24 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mcpu=pwr10 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR10LE
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mcpu=pwr10 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR10BE
;;
;; Vectors of type i8
;;
define dso_local i8 @v2i8(<2 x i8> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v2i8:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: vspltb v3, v2, 14
; PWR9LE-NEXT: li r3, 0
; PWR9LE-NEXT: vminub v2, v2, v3
; PWR9LE-NEXT: vextubrx r3, r3, v2
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v2i8:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: vspltb v3, v2, 1
; PWR9BE-NEXT: li r3, 0
; PWR9BE-NEXT: vminub v2, v2, v3
; PWR9BE-NEXT: vextublx r3, r3, v2
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v2i8:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: vspltb v3, v2, 14
; PWR10LE-NEXT: li r3, 0
; PWR10LE-NEXT: vminub v2, v2, v3
; PWR10LE-NEXT: vextubrx r3, r3, v2
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v2i8:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: vspltb v3, v2, 1
; PWR10BE-NEXT: li r3, 0
; PWR10BE-NEXT: vminub v2, v2, v3
; PWR10BE-NEXT: vextublx r3, r3, v2
; PWR10BE-NEXT: blr
entry:
%0 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> %a)
ret i8 %0
}
define dso_local i8 @v4i8(<4 x i8> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v4i8:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: vsplth v3, v2, 6
; PWR9LE-NEXT: li r3, 0
; PWR9LE-NEXT: vminub v2, v2, v3
; PWR9LE-NEXT: vspltb v3, v2, 14
; PWR9LE-NEXT: vminub v2, v2, v3
; PWR9LE-NEXT: vextubrx r3, r3, v2
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v4i8:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: vsplth v3, v2, 1
; PWR9BE-NEXT: li r3, 0
; PWR9BE-NEXT: vminub v2, v2, v3
; PWR9BE-NEXT: vspltb v3, v2, 1
; PWR9BE-NEXT: vminub v2, v2, v3
; PWR9BE-NEXT: vextublx r3, r3, v2
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v4i8:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: vsplth v3, v2, 6
; PWR10LE-NEXT: li r3, 0
; PWR10LE-NEXT: vminub v2, v2, v3
; PWR10LE-NEXT: vspltb v3, v2, 14
; PWR10LE-NEXT: vminub v2, v2, v3
; PWR10LE-NEXT: vextubrx r3, r3, v2
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v4i8:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: vsplth v3, v2, 1
; PWR10BE-NEXT: li r3, 0
; PWR10BE-NEXT: vminub v2, v2, v3
; PWR10BE-NEXT: vspltb v3, v2, 1
; PWR10BE-NEXT: vminub v2, v2, v3
; PWR10BE-NEXT: vextublx r3, r3, v2
; PWR10BE-NEXT: blr
entry:
%0 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> %a)
ret i8 %0
}
define dso_local i8 @v8i8(<8 x i8> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v8i8:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxspltw v3, v2, 2
; PWR9LE-NEXT: li r3, 0
; PWR9LE-NEXT: vminub v2, v2, v3
; PWR9LE-NEXT: vsplth v3, v2, 6
; PWR9LE-NEXT: vminub v2, v2, v3
; PWR9LE-NEXT: vspltb v3, v2, 14
; PWR9LE-NEXT: vminub v2, v2, v3
; PWR9LE-NEXT: vextubrx r3, r3, v2
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v8i8:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxspltw v3, v2, 1
; PWR9BE-NEXT: li r3, 0
; PWR9BE-NEXT: vminub v2, v2, v3
; PWR9BE-NEXT: vsplth v3, v2, 1
; PWR9BE-NEXT: vminub v2, v2, v3
; PWR9BE-NEXT: vspltb v3, v2, 1
; PWR9BE-NEXT: vminub v2, v2, v3
; PWR9BE-NEXT: vextublx r3, r3, v2
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v8i8:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxspltw v3, v2, 2
; PWR10LE-NEXT: li r3, 0
; PWR10LE-NEXT: vminub v2, v2, v3
; PWR10LE-NEXT: vsplth v3, v2, 6
; PWR10LE-NEXT: vminub v2, v2, v3
; PWR10LE-NEXT: vspltb v3, v2, 14
; PWR10LE-NEXT: vminub v2, v2, v3
; PWR10LE-NEXT: vextubrx r3, r3, v2
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v8i8:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxspltw v3, v2, 1
; PWR10BE-NEXT: li r3, 0
; PWR10BE-NEXT: vminub v2, v2, v3
; PWR10BE-NEXT: vsplth v3, v2, 1
; PWR10BE-NEXT: vminub v2, v2, v3
; PWR10BE-NEXT: vspltb v3, v2, 1
; PWR10BE-NEXT: vminub v2, v2, v3
; PWR10BE-NEXT: vextublx r3, r3, v2
; PWR10BE-NEXT: blr
entry:
%0 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %a)
ret i8 %0
}
define dso_local i8 @v16i8(<16 x i8> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16i8:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxswapd v3, v2
; PWR9LE-NEXT: li r3, 0
; PWR9LE-NEXT: vminub v2, v2, v3
; PWR9LE-NEXT: xxspltw v3, v2, 2
; PWR9LE-NEXT: vminub v2, v2, v3
; PWR9LE-NEXT: vsplth v3, v2, 6
; PWR9LE-NEXT: vminub v2, v2, v3
; PWR9LE-NEXT: vspltb v3, v2, 14
; PWR9LE-NEXT: vminub v2, v2, v3
; PWR9LE-NEXT: vextubrx r3, r3, v2
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v16i8:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxswapd v3, v2
; PWR9BE-NEXT: li r3, 0
; PWR9BE-NEXT: vminub v2, v2, v3
; PWR9BE-NEXT: xxspltw v3, v2, 1
; PWR9BE-NEXT: vminub v2, v2, v3
; PWR9BE-NEXT: vsplth v3, v2, 1
; PWR9BE-NEXT: vminub v2, v2, v3
; PWR9BE-NEXT: vspltb v3, v2, 1
; PWR9BE-NEXT: vminub v2, v2, v3
; PWR9BE-NEXT: vextublx r3, r3, v2
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v16i8:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxswapd v3, v2
; PWR10LE-NEXT: li r3, 0
; PWR10LE-NEXT: vminub v2, v2, v3
; PWR10LE-NEXT: xxspltw v3, v2, 2
; PWR10LE-NEXT: vminub v2, v2, v3
; PWR10LE-NEXT: vsplth v3, v2, 6
; PWR10LE-NEXT: vminub v2, v2, v3
; PWR10LE-NEXT: vspltb v3, v2, 14
; PWR10LE-NEXT: vminub v2, v2, v3
; PWR10LE-NEXT: vextubrx r3, r3, v2
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v16i8:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxswapd v3, v2
; PWR10BE-NEXT: li r3, 0
; PWR10BE-NEXT: vminub v2, v2, v3
; PWR10BE-NEXT: xxspltw v3, v2, 1
; PWR10BE-NEXT: vminub v2, v2, v3
; PWR10BE-NEXT: vsplth v3, v2, 1
; PWR10BE-NEXT: vminub v2, v2, v3
; PWR10BE-NEXT: vspltb v3, v2, 1
; PWR10BE-NEXT: vminub v2, v2, v3
; PWR10BE-NEXT: vextublx r3, r3, v2
; PWR10BE-NEXT: blr
entry:
%0 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %a)
ret i8 %0
}
declare i8 @llvm.vector.reduce.umin.v2i8(<2 x i8>) #0
declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>) #0
declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>) #0
declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) #0
;;
;; Vectors of type i16
;;
define dso_local i16 @v2i16(<2 x i16> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v2i16:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: vsplth v3, v2, 6
; PWR9LE-NEXT: li r3, 0
; PWR9LE-NEXT: vminuh v2, v2, v3
; PWR9LE-NEXT: vextuhrx r3, r3, v2
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v2i16:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: vsplth v3, v2, 1
; PWR9BE-NEXT: li r3, 0
; PWR9BE-NEXT: vminuh v2, v2, v3
; PWR9BE-NEXT: vextuhlx r3, r3, v2
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v2i16:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: vsplth v3, v2, 6
; PWR10LE-NEXT: li r3, 0
; PWR10LE-NEXT: vminuh v2, v2, v3
; PWR10LE-NEXT: vextuhrx r3, r3, v2
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v2i16:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: vsplth v3, v2, 1
; PWR10BE-NEXT: li r3, 0
; PWR10BE-NEXT: vminuh v2, v2, v3
; PWR10BE-NEXT: vextuhlx r3, r3, v2
; PWR10BE-NEXT: blr
entry:
%0 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> %a)
ret i16 %0
}
define dso_local i16 @v4i16(<4 x i16> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v4i16:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxspltw v3, v2, 2
; PWR9LE-NEXT: li r3, 0
; PWR9LE-NEXT: vminuh v2, v2, v3
; PWR9LE-NEXT: vsplth v3, v2, 6
; PWR9LE-NEXT: vminuh v2, v2, v3
; PWR9LE-NEXT: vextuhrx r3, r3, v2
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v4i16:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxspltw v3, v2, 1
; PWR9BE-NEXT: li r3, 0
; PWR9BE-NEXT: vminuh v2, v2, v3
; PWR9BE-NEXT: vsplth v3, v2, 1
; PWR9BE-NEXT: vminuh v2, v2, v3
; PWR9BE-NEXT: vextuhlx r3, r3, v2
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v4i16:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxspltw v3, v2, 2
; PWR10LE-NEXT: li r3, 0
; PWR10LE-NEXT: vminuh v2, v2, v3
; PWR10LE-NEXT: vsplth v3, v2, 6
; PWR10LE-NEXT: vminuh v2, v2, v3
; PWR10LE-NEXT: vextuhrx r3, r3, v2
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v4i16:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxspltw v3, v2, 1
; PWR10BE-NEXT: li r3, 0
; PWR10BE-NEXT: vminuh v2, v2, v3
; PWR10BE-NEXT: vsplth v3, v2, 1
; PWR10BE-NEXT: vminuh v2, v2, v3
; PWR10BE-NEXT: vextuhlx r3, r3, v2
; PWR10BE-NEXT: blr
entry:
%0 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %a)
ret i16 %0
}
define dso_local i16 @v8i16(<8 x i16> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v8i16:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxswapd v3, v2
; PWR9LE-NEXT: li r3, 0
; PWR9LE-NEXT: vminuh v2, v2, v3
; PWR9LE-NEXT: xxspltw v3, v2, 2
; PWR9LE-NEXT: vminuh v2, v2, v3
; PWR9LE-NEXT: vsplth v3, v2, 6
; PWR9LE-NEXT: vminuh v2, v2, v3
; PWR9LE-NEXT: vextuhrx r3, r3, v2
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v8i16:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxswapd v3, v2
; PWR9BE-NEXT: li r3, 0
; PWR9BE-NEXT: vminuh v2, v2, v3
; PWR9BE-NEXT: xxspltw v3, v2, 1
; PWR9BE-NEXT: vminuh v2, v2, v3
; PWR9BE-NEXT: vsplth v3, v2, 1
; PWR9BE-NEXT: vminuh v2, v2, v3
; PWR9BE-NEXT: vextuhlx r3, r3, v2
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v8i16:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxswapd v3, v2
; PWR10LE-NEXT: li r3, 0
; PWR10LE-NEXT: vminuh v2, v2, v3
; PWR10LE-NEXT: xxspltw v3, v2, 2
; PWR10LE-NEXT: vminuh v2, v2, v3
; PWR10LE-NEXT: vsplth v3, v2, 6
; PWR10LE-NEXT: vminuh v2, v2, v3
; PWR10LE-NEXT: vextuhrx r3, r3, v2
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v8i16:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxswapd v3, v2
; PWR10BE-NEXT: li r3, 0
; PWR10BE-NEXT: vminuh v2, v2, v3
; PWR10BE-NEXT: xxspltw v3, v2, 1
; PWR10BE-NEXT: vminuh v2, v2, v3
; PWR10BE-NEXT: vsplth v3, v2, 1
; PWR10BE-NEXT: vminuh v2, v2, v3
; PWR10BE-NEXT: vextuhlx r3, r3, v2
; PWR10BE-NEXT: blr
entry:
%0 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %a)
ret i16 %0
}
define dso_local i16 @v16i16(<16 x i16> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16i16:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: vminuh v2, v2, v3
; PWR9LE-NEXT: li r3, 0
; PWR9LE-NEXT: xxswapd v3, v2
; PWR9LE-NEXT: vminuh v2, v2, v3
; PWR9LE-NEXT: xxspltw v3, v2, 2
; PWR9LE-NEXT: vminuh v2, v2, v3
; PWR9LE-NEXT: vsplth v3, v2, 6
; PWR9LE-NEXT: vminuh v2, v2, v3
; PWR9LE-NEXT: vextuhrx r3, r3, v2
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v16i16:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: vminuh v2, v2, v3
; PWR9BE-NEXT: li r3, 0
; PWR9BE-NEXT: xxswapd v3, v2
; PWR9BE-NEXT: vminuh v2, v2, v3
; PWR9BE-NEXT: xxspltw v3, v2, 1
; PWR9BE-NEXT: vminuh v2, v2, v3
; PWR9BE-NEXT: vsplth v3, v2, 1
; PWR9BE-NEXT: vminuh v2, v2, v3
; PWR9BE-NEXT: vextuhlx r3, r3, v2
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v16i16:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: vminuh v2, v2, v3
; PWR10LE-NEXT: li r3, 0
; PWR10LE-NEXT: xxswapd v3, v2
; PWR10LE-NEXT: vminuh v2, v2, v3
; PWR10LE-NEXT: xxspltw v3, v2, 2
; PWR10LE-NEXT: vminuh v2, v2, v3
; PWR10LE-NEXT: vsplth v3, v2, 6
; PWR10LE-NEXT: vminuh v2, v2, v3
; PWR10LE-NEXT: vextuhrx r3, r3, v2
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v16i16:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: vminuh v2, v2, v3
; PWR10BE-NEXT: li r3, 0
; PWR10BE-NEXT: xxswapd v3, v2
; PWR10BE-NEXT: vminuh v2, v2, v3
; PWR10BE-NEXT: xxspltw v3, v2, 1
; PWR10BE-NEXT: vminuh v2, v2, v3
; PWR10BE-NEXT: vsplth v3, v2, 1
; PWR10BE-NEXT: vminuh v2, v2, v3
; PWR10BE-NEXT: vextuhlx r3, r3, v2
; PWR10BE-NEXT: blr
entry:
%0 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %a)
ret i16 %0
}
declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>) #0
declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>) #0
declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>) #0
declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>) #0
;;
;; Vectors of type i32
;;
define dso_local i32 @v2i32(<2 x i32> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v2i32:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxspltw v3, v2, 2
; PWR9LE-NEXT: li r3, 0
; PWR9LE-NEXT: vminuw v2, v2, v3
; PWR9LE-NEXT: vextuwrx r3, r3, v2
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v2i32:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxspltw v3, v2, 1
; PWR9BE-NEXT: li r3, 0
; PWR9BE-NEXT: vminuw v2, v2, v3
; PWR9BE-NEXT: vextuwlx r3, r3, v2
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v2i32:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxspltw v3, v2, 2
; PWR10LE-NEXT: li r3, 0
; PWR10LE-NEXT: vminuw v2, v2, v3
; PWR10LE-NEXT: vextuwrx r3, r3, v2
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v2i32:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxspltw v3, v2, 1
; PWR10BE-NEXT: li r3, 0
; PWR10BE-NEXT: vminuw v2, v2, v3
; PWR10BE-NEXT: vextuwlx r3, r3, v2
; PWR10BE-NEXT: blr
entry:
%0 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %a)
ret i32 %0
}
define dso_local i32 @v4i32(<4 x i32> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v4i32:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxswapd v3, v2
; PWR9LE-NEXT: li r3, 0
; PWR9LE-NEXT: vminuw v2, v2, v3
; PWR9LE-NEXT: xxspltw v3, v2, 2
; PWR9LE-NEXT: vminuw v2, v2, v3
; PWR9LE-NEXT: vextuwrx r3, r3, v2
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v4i32:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxswapd v3, v2
; PWR9BE-NEXT: li r3, 0
; PWR9BE-NEXT: vminuw v2, v2, v3
; PWR9BE-NEXT: xxspltw v3, v2, 1
; PWR9BE-NEXT: vminuw v2, v2, v3
; PWR9BE-NEXT: vextuwlx r3, r3, v2
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v4i32:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxswapd v3, v2
; PWR10LE-NEXT: li r3, 0
; PWR10LE-NEXT: vminuw v2, v2, v3
; PWR10LE-NEXT: xxspltw v3, v2, 2
; PWR10LE-NEXT: vminuw v2, v2, v3
; PWR10LE-NEXT: vextuwrx r3, r3, v2
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v4i32:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxswapd v3, v2
; PWR10BE-NEXT: li r3, 0
; PWR10BE-NEXT: vminuw v2, v2, v3
; PWR10BE-NEXT: xxspltw v3, v2, 1
; PWR10BE-NEXT: vminuw v2, v2, v3
; PWR10BE-NEXT: vextuwlx r3, r3, v2
; PWR10BE-NEXT: blr
entry:
%0 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a)
ret i32 %0
}
define dso_local i32 @v8i32(<8 x i32> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v8i32:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: vminuw v2, v2, v3
; PWR9LE-NEXT: li r3, 0
; PWR9LE-NEXT: xxswapd v3, v2
; PWR9LE-NEXT: vminuw v2, v2, v3
; PWR9LE-NEXT: xxspltw v3, v2, 2
; PWR9LE-NEXT: vminuw v2, v2, v3
; PWR9LE-NEXT: vextuwrx r3, r3, v2
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v8i32:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: vminuw v2, v2, v3
; PWR9BE-NEXT: li r3, 0
; PWR9BE-NEXT: xxswapd v3, v2
; PWR9BE-NEXT: vminuw v2, v2, v3
; PWR9BE-NEXT: xxspltw v3, v2, 1
; PWR9BE-NEXT: vminuw v2, v2, v3
; PWR9BE-NEXT: vextuwlx r3, r3, v2
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v8i32:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: vminuw v2, v2, v3
; PWR10LE-NEXT: li r3, 0
; PWR10LE-NEXT: xxswapd v3, v2
; PWR10LE-NEXT: vminuw v2, v2, v3
; PWR10LE-NEXT: xxspltw v3, v2, 2
; PWR10LE-NEXT: vminuw v2, v2, v3
; PWR10LE-NEXT: vextuwrx r3, r3, v2
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v8i32:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: vminuw v2, v2, v3
; PWR10BE-NEXT: li r3, 0
; PWR10BE-NEXT: xxswapd v3, v2
; PWR10BE-NEXT: vminuw v2, v2, v3
; PWR10BE-NEXT: xxspltw v3, v2, 1
; PWR10BE-NEXT: vminuw v2, v2, v3
; PWR10BE-NEXT: vextuwlx r3, r3, v2
; PWR10BE-NEXT: blr
entry:
%0 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %a)
ret i32 %0
}
define dso_local i32 @v16i32(<16 x i32> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16i32:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: vminuw v3, v3, v5
; PWR9LE-NEXT: vminuw v2, v2, v4
; PWR9LE-NEXT: li r3, 0
; PWR9LE-NEXT: vminuw v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v2
; PWR9LE-NEXT: vminuw v2, v2, v3
; PWR9LE-NEXT: xxspltw v3, v2, 2
; PWR9LE-NEXT: vminuw v2, v2, v3
; PWR9LE-NEXT: vextuwrx r3, r3, v2
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v16i32:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: vminuw v3, v3, v5
; PWR9BE-NEXT: vminuw v2, v2, v4
; PWR9BE-NEXT: li r3, 0
; PWR9BE-NEXT: vminuw v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v2
; PWR9BE-NEXT: vminuw v2, v2, v3
; PWR9BE-NEXT: xxspltw v3, v2, 1
; PWR9BE-NEXT: vminuw v2, v2, v3
; PWR9BE-NEXT: vextuwlx r3, r3, v2
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v16i32:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: vminuw v3, v3, v5
; PWR10LE-NEXT: vminuw v2, v2, v4
; PWR10LE-NEXT: li r3, 0
; PWR10LE-NEXT: vminuw v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v2
; PWR10LE-NEXT: vminuw v2, v2, v3
; PWR10LE-NEXT: xxspltw v3, v2, 2
; PWR10LE-NEXT: vminuw v2, v2, v3
; PWR10LE-NEXT: vextuwrx r3, r3, v2
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v16i32:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: vminuw v3, v3, v5
; PWR10BE-NEXT: vminuw v2, v2, v4
; PWR10BE-NEXT: li r3, 0
; PWR10BE-NEXT: vminuw v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v2
; PWR10BE-NEXT: vminuw v2, v2, v3
; PWR10BE-NEXT: xxspltw v3, v2, 1
; PWR10BE-NEXT: vminuw v2, v2, v3
; PWR10BE-NEXT: vextuwlx r3, r3, v2
; PWR10BE-NEXT: blr
entry:
%0 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %a)
ret i32 %0
}
declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>) #0
declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) #0
declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) #0
declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>) #0
;;
;; Vectors of type i64
;;
define dso_local i64 @v2i64(<2 x i64> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v2i64:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxswapd v3, v2
; PWR9LE-NEXT: vminud v2, v2, v3
; PWR9LE-NEXT: mfvsrld r3, v2
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v2i64:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxswapd v3, v2
; PWR9BE-NEXT: vminud v2, v2, v3
; PWR9BE-NEXT: mfvsrd r3, v2
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v2i64:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxswapd v3, v2
; PWR10LE-NEXT: vminud v2, v2, v3
; PWR10LE-NEXT: mfvsrld r3, v2
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v2i64:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxswapd v3, v2
; PWR10BE-NEXT: vminud v2, v2, v3
; PWR10BE-NEXT: mfvsrd r3, v2
; PWR10BE-NEXT: blr
entry:
%0 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %a)
ret i64 %0
}
define dso_local i64 @v4i64(<4 x i64> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v4i64:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: vminud v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v2
; PWR9LE-NEXT: vminud v2, v2, v3
; PWR9LE-NEXT: mfvsrld r3, v2
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v4i64:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: vminud v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v2
; PWR9BE-NEXT: vminud v2, v2, v3
; PWR9BE-NEXT: mfvsrd r3, v2
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v4i64:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: vminud v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v2
; PWR10LE-NEXT: vminud v2, v2, v3
; PWR10LE-NEXT: mfvsrld r3, v2
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v4i64:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: vminud v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v2
; PWR10BE-NEXT: vminud v2, v2, v3
; PWR10BE-NEXT: mfvsrd r3, v2
; PWR10BE-NEXT: blr
entry:
%0 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %a)
ret i64 %0
}
define dso_local i64 @v8i64(<8 x i64> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v8i64:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: vminud v2, v2, v4
; PWR9LE-NEXT: vminud v3, v3, v5
; PWR9LE-NEXT: vminud v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v2
; PWR9LE-NEXT: vminud v2, v2, v3
; PWR9LE-NEXT: mfvsrld r3, v2
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v8i64:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: vminud v2, v2, v4
; PWR9BE-NEXT: vminud v3, v3, v5
; PWR9BE-NEXT: vminud v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v2
; PWR9BE-NEXT: vminud v2, v2, v3
; PWR9BE-NEXT: mfvsrd r3, v2
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v8i64:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: vminud v2, v2, v4
; PWR10LE-NEXT: vminud v3, v3, v5
; PWR10LE-NEXT: vminud v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v2
; PWR10LE-NEXT: vminud v2, v2, v3
; PWR10LE-NEXT: mfvsrld r3, v2
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v8i64:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: vminud v2, v2, v4
; PWR10BE-NEXT: vminud v3, v3, v5
; PWR10BE-NEXT: vminud v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v2
; PWR10BE-NEXT: vminud v2, v2, v3
; PWR10BE-NEXT: mfvsrd r3, v2
; PWR10BE-NEXT: blr
entry:
%0 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %a)
ret i64 %0
}
define dso_local i64 @v16i64(<16 x i64> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16i64:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: vminud v3, v3, v7
; PWR9LE-NEXT: vminud v5, v5, v9
; PWR9LE-NEXT: vminud v2, v2, v6
; PWR9LE-NEXT: vminud v4, v4, v8
; PWR9LE-NEXT: vminud v2, v2, v4
; PWR9LE-NEXT: vminud v3, v3, v5
; PWR9LE-NEXT: vminud v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v2
; PWR9LE-NEXT: vminud v2, v2, v3
; PWR9LE-NEXT: mfvsrld r3, v2
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v16i64:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: vminud v3, v3, v7
; PWR9BE-NEXT: vminud v5, v5, v9
; PWR9BE-NEXT: vminud v2, v2, v6
; PWR9BE-NEXT: vminud v4, v4, v8
; PWR9BE-NEXT: vminud v2, v2, v4
; PWR9BE-NEXT: vminud v3, v3, v5
; PWR9BE-NEXT: vminud v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v2
; PWR9BE-NEXT: vminud v2, v2, v3
; PWR9BE-NEXT: mfvsrd r3, v2
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v16i64:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: vminud v3, v3, v7
; PWR10LE-NEXT: vminud v5, v5, v9
; PWR10LE-NEXT: vminud v2, v2, v6
; PWR10LE-NEXT: vminud v4, v4, v8
; PWR10LE-NEXT: vminud v2, v2, v4
; PWR10LE-NEXT: vminud v3, v3, v5
; PWR10LE-NEXT: vminud v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v2
; PWR10LE-NEXT: vminud v2, v2, v3
; PWR10LE-NEXT: mfvsrld r3, v2
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v16i64:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: vminud v3, v3, v7
; PWR10BE-NEXT: vminud v5, v5, v9
; PWR10BE-NEXT: vminud v2, v2, v6
; PWR10BE-NEXT: vminud v4, v4, v8
; PWR10BE-NEXT: vminud v2, v2, v4
; PWR10BE-NEXT: vminud v3, v3, v5
; PWR10BE-NEXT: vminud v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v2
; PWR10BE-NEXT: vminud v2, v2, v3
; PWR10BE-NEXT: mfvsrd r3, v2
; PWR10BE-NEXT: blr
entry:
%0 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> %a)
ret i64 %0
}
declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) #0
declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) #0
declare i64 @llvm.vector.reduce.umin.v8i64(<8 x i64>) #0
declare i64 @llvm.vector.reduce.umin.v16i64(<16 x i64>) #0
attributes #0 = { nounwind }