Philip Reames 2c7786e94a
Prefer use of 0.0 over -0.0 for fadd reductions w/nsz (in IR) (#106770)
This is a follow up to 924907bc6, and is mostly motivated by consistency
but does include one additional optimization. In general, we prefer 0.0
over -0.0 as the identity value for an fadd. We use that value in
several places, but don't in others. So, let's be consistent and use the
same identity (when nsz allows) everywhere.

This creates a bunch of test churn, but due to 924907bc6, most of that
churn doesn't actually indicate a change in codegen. The exception is
that this change enables the use of 0.0 for nsz, but *not* reasoc, fadd
reductions. Or said differently, it allows the neutral value of an
ordered fadd reduction to be 0.0.
2024-09-03 09:16:37 -07:00

56 lines
2.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -vector-library=MASSV -force-vector-interleave=1 \
; RUN: -vectorizer-maximize-bandwidth -passes='default<O2>,inject-tli-mappings,loop-vectorize' \
; RUN: -mtriple=powerpc64le-unknown-linux -S -mcpu=pwr9 2>&1 | FileCheck %s
define dso_local double @test(ptr %Arr) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[ARR:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double>
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <2 x double> @__sind2(<2 x double> [[TMP1]])
; CHECK-NEXT: [[TMP3]] = fadd fast <2 x double> [[TMP2]], [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi <2 x double> [ [[TMP3]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = tail call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[DOTLCSSA]])
; CHECK-NEXT: ret double [[TMP5]]
;
entry:
br label %for.cond
for.cond:
%Sum.0 = phi double [ 0.000000e+00, %entry ], [ %add, %for.inc ]
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%cmp = icmp slt i32 %i.0, 128
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup:
br label %for.end
for.body:
%idxprom = sext i32 %i.0 to i64
%arrayidx = getelementptr inbounds float, ptr %Arr, i64 %idxprom
%0 = load float, ptr %arrayidx, align 4
%conv = fpext float %0 to double
%1 = call fast double @llvm.sin.f64(double %conv)
%add = fadd fast double %Sum.0, %1
br label %for.inc
for.inc:
%inc = add nsw i32 %i.0, 1
br label %for.cond
for.end:
ret double %Sum.0
}
declare double @llvm.sin.f64(double)