llvm-project/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-maximumnum-minimumnum.ll
Florian Hahn ec1016f7ef
[IVDescriptors] Support reductions with minimumnum/maximumnum. (#137335)
Add a new reduction recurrence kind for reductions with
minimumnum/maximumnum. Such reductions can be vectorized without
nsz/nnans, same as reductions with maximum/minimum intrinsics.

Note that a new reduction kind is needed to make sure partial reductions
are also combined with minimumnum/maximumnum.

Note that the final reduction to a scalar value is performed with
vector.reduce.fmin/fmax. This should be fine, as the results of the
partial reductions with maximumnum/minimumnum silences any sNaNs.

In-loop and reductions in SLP are not supported yet, as there's no
reduction version of maximumnum/minimumnum yet and fmax may be
incorrect.

PR: https://github.com/llvm/llvm-project/pull/137335
2025-04-28 11:16:36 +01:00

63 lines
3.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -passes=slp-vectorizer -mtriple=arm64-apple-macosx -S %s | FileCheck %s
declare float @llvm.maximumnum.f32(float, float)
declare float @llvm.minimumnum.f32(float, float)
; TODO: Need reduction version of maximumnum/minimumnum.
define float @reduction_v4f32_maximumnum(ptr %p) {
; CHECK-LABEL: define float @reduction_v4f32_maximumnum
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, ptr [[P]], i64 1
; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 2
; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, ptr [[P]], i64 3
; CHECK-NEXT: [[T0:%.*]] = load float, ptr [[P]], align 4
; CHECK-NEXT: [[T1:%.*]] = load float, ptr [[G1]], align 4
; CHECK-NEXT: [[T2:%.*]] = load float, ptr [[G2]], align 4
; CHECK-NEXT: [[T3:%.*]] = load float, ptr [[G3]], align 4
; CHECK-NEXT: [[M1:%.*]] = tail call float @llvm.maximumnum.f32(float [[T1]], float [[T0]])
; CHECK-NEXT: [[M2:%.*]] = tail call float @llvm.maximumnum.f32(float [[T2]], float [[M1]])
; CHECK-NEXT: [[M3:%.*]] = tail call float @llvm.maximumnum.f32(float [[T3]], float [[M2]])
; CHECK-NEXT: ret float [[M3]]
;
%g1 = getelementptr inbounds float, ptr %p, i64 1
%g2 = getelementptr inbounds float, ptr %p, i64 2
%g3 = getelementptr inbounds float, ptr %p, i64 3
%t0 = load float, ptr %p, align 4
%t1 = load float, ptr %g1, align 4
%t2 = load float, ptr %g2, align 4
%t3 = load float, ptr %g3, align 4
%m1 = tail call float @llvm.maximumnum.f32(float %t1, float %t0)
%m2 = tail call float @llvm.maximumnum.f32(float %t2, float %m1)
%m3 = tail call float @llvm.maximumnum.f32(float %t3, float %m2)
ret float %m3
}
define float @reduction_v4f64_minimumnum(ptr %p) {
; CHECK-LABEL: define float @reduction_v4f64_minimumnum
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, ptr [[P]], i64 1
; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 2
; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, ptr [[P]], i64 3
; CHECK-NEXT: [[T0:%.*]] = load float, ptr [[P]], align 4
; CHECK-NEXT: [[T1:%.*]] = load float, ptr [[G1]], align 4
; CHECK-NEXT: [[T2:%.*]] = load float, ptr [[G2]], align 4
; CHECK-NEXT: [[T3:%.*]] = load float, ptr [[G3]], align 4
; CHECK-NEXT: [[M1:%.*]] = tail call float @llvm.minimumnum.f32(float [[T1]], float [[T0]])
; CHECK-NEXT: [[M2:%.*]] = tail call float @llvm.minimumnum.f32(float [[T2]], float [[M1]])
; CHECK-NEXT: [[M3:%.*]] = tail call float @llvm.minimumnum.f32(float [[T3]], float [[M2]])
; CHECK-NEXT: ret float [[M3]]
;
%g1 = getelementptr inbounds float, ptr %p, i64 1
%g2 = getelementptr inbounds float, ptr %p, i64 2
%g3 = getelementptr inbounds float, ptr %p, i64 3
%t0 = load float, ptr %p, align 4
%t1 = load float, ptr %g1, align 4
%t2 = load float, ptr %g2, align 4
%t3 = load float, ptr %g3, align 4
%m1 = tail call float @llvm.minimumnum.f32(float %t1, float %t0)
%m2 = tail call float @llvm.minimumnum.f32(float %t2, float %m1)
%m3 = tail call float @llvm.minimumnum.f32(float %t3, float %m2)
ret float %m3
}