Add a new reduction recurrence kind for reductions with minimumnum/maximumnum. Such reductions can be vectorized without nsz/nnans, same as reductions with maximum/minimum intrinsics. Note that a new reduction kind is needed to make sure partial reductions are also combined with minimumnum/maximumnum. Note that the final reduction to a scalar value is performed with vector.reduce.fmin/fmax. This should be fine, as the results of the partial reductions with maximumnum/minimumnum silences any sNaNs. In-loop and reductions in SLP are not supported yet, as there's no reduction version of maximumnum/minimumnum yet and fmax may be incorrect. PR: https://github.com/llvm/llvm-project/pull/137335
63 lines
3.2 KiB
LLVM
63 lines
3.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: opt -passes=slp-vectorizer -mtriple=arm64-apple-macosx -S %s | FileCheck %s
|
|
|
|
declare float @llvm.maximumnum.f32(float, float)
|
|
declare float @llvm.minimumnum.f32(float, float)
|
|
|
|
; TODO: Need reduction version of maximumnum/minimumnum.
|
|
define float @reduction_v4f32_maximumnum(ptr %p) {
|
|
; CHECK-LABEL: define float @reduction_v4f32_maximumnum
|
|
; CHECK-SAME: (ptr [[P:%.*]]) {
|
|
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, ptr [[P]], i64 1
|
|
; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 2
|
|
; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, ptr [[P]], i64 3
|
|
; CHECK-NEXT: [[T0:%.*]] = load float, ptr [[P]], align 4
|
|
; CHECK-NEXT: [[T1:%.*]] = load float, ptr [[G1]], align 4
|
|
; CHECK-NEXT: [[T2:%.*]] = load float, ptr [[G2]], align 4
|
|
; CHECK-NEXT: [[T3:%.*]] = load float, ptr [[G3]], align 4
|
|
; CHECK-NEXT: [[M1:%.*]] = tail call float @llvm.maximumnum.f32(float [[T1]], float [[T0]])
|
|
; CHECK-NEXT: [[M2:%.*]] = tail call float @llvm.maximumnum.f32(float [[T2]], float [[M1]])
|
|
; CHECK-NEXT: [[M3:%.*]] = tail call float @llvm.maximumnum.f32(float [[T3]], float [[M2]])
|
|
; CHECK-NEXT: ret float [[M3]]
|
|
;
|
|
%g1 = getelementptr inbounds float, ptr %p, i64 1
|
|
%g2 = getelementptr inbounds float, ptr %p, i64 2
|
|
%g3 = getelementptr inbounds float, ptr %p, i64 3
|
|
%t0 = load float, ptr %p, align 4
|
|
%t1 = load float, ptr %g1, align 4
|
|
%t2 = load float, ptr %g2, align 4
|
|
%t3 = load float, ptr %g3, align 4
|
|
%m1 = tail call float @llvm.maximumnum.f32(float %t1, float %t0)
|
|
%m2 = tail call float @llvm.maximumnum.f32(float %t2, float %m1)
|
|
%m3 = tail call float @llvm.maximumnum.f32(float %t3, float %m2)
|
|
ret float %m3
|
|
}
|
|
|
|
define float @reduction_v4f64_minimumnum(ptr %p) {
|
|
; CHECK-LABEL: define float @reduction_v4f64_minimumnum
|
|
; CHECK-SAME: (ptr [[P:%.*]]) {
|
|
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, ptr [[P]], i64 1
|
|
; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 2
|
|
; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, ptr [[P]], i64 3
|
|
; CHECK-NEXT: [[T0:%.*]] = load float, ptr [[P]], align 4
|
|
; CHECK-NEXT: [[T1:%.*]] = load float, ptr [[G1]], align 4
|
|
; CHECK-NEXT: [[T2:%.*]] = load float, ptr [[G2]], align 4
|
|
; CHECK-NEXT: [[T3:%.*]] = load float, ptr [[G3]], align 4
|
|
; CHECK-NEXT: [[M1:%.*]] = tail call float @llvm.minimumnum.f32(float [[T1]], float [[T0]])
|
|
; CHECK-NEXT: [[M2:%.*]] = tail call float @llvm.minimumnum.f32(float [[T2]], float [[M1]])
|
|
; CHECK-NEXT: [[M3:%.*]] = tail call float @llvm.minimumnum.f32(float [[T3]], float [[M2]])
|
|
; CHECK-NEXT: ret float [[M3]]
|
|
;
|
|
%g1 = getelementptr inbounds float, ptr %p, i64 1
|
|
%g2 = getelementptr inbounds float, ptr %p, i64 2
|
|
%g3 = getelementptr inbounds float, ptr %p, i64 3
|
|
%t0 = load float, ptr %p, align 4
|
|
%t1 = load float, ptr %g1, align 4
|
|
%t2 = load float, ptr %g2, align 4
|
|
%t3 = load float, ptr %g3, align 4
|
|
%m1 = tail call float @llvm.minimumnum.f32(float %t1, float %t0)
|
|
%m2 = tail call float @llvm.minimumnum.f32(float %t2, float %m1)
|
|
%m3 = tail call float @llvm.minimumnum.f32(float %t3, float %m2)
|
|
ret float %m3
|
|
}
|