
Its expected that the sequence `return X > 0.0 ? X : -X`, compiled with -Ofast, produces fabs intrinsic. However, at this point, LLVM is unable to do so. The above sequence goes through the following transformation during the pass pipeline: 1) SROA pass generates the phi node. Here, it does not infer the fast-math flags on the phi node unlike clang frontend typically does. 2) Phi node eventually gets translated into select instruction. Because of missing no-signed-zeros(nsz) fast-math flag on the select instruction, InstCombine pass fails to fold the sequence into fabs intrinsic. This patch, as a part of SROA, tries to propagate nsz fast-math flag on the phi node using function attribute enabling this folding. Closes #51601 Co-authored-by: Sushant Gokhale <sgokhale@nvidia.com>
30 lines
1.2 KiB
LLVM
30 lines
1.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -passes='default<O1>' -S < %s | FileCheck %s
|
|
|
|
; Following test must generate fabs intrinsic. It goes through following stages
|
|
; 1. SROA propagates the nsz function attribute on the phi node.
|
|
; 2. SimplifyCFG pass converts phi node to select.
|
|
; 3. InstCombine converts select with nsz flag into fabs intrinsic.
|
|
|
|
define double @fabs_fcmp_olt_nsz_func_attr(double %0, double %1) "no-signed-zeros-fp-math"="true" {
|
|
; CHECK-LABEL: define double @fabs_fcmp_olt_nsz_func_attr(
|
|
; CHECK-SAME: double [[TMP0:%.*]], double [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[X_0:%.*]] = tail call nnan nsz double @llvm.fabs.f64(double [[TMP0]])
|
|
; CHECK-NEXT: ret double [[X_0]]
|
|
entry:
|
|
%x = alloca double
|
|
store double %0, ptr %x
|
|
%cmp = fcmp nnan nsz olt double %0, 0.000000e+00
|
|
br i1 %cmp, label %if.then, label %return
|
|
|
|
if.then: ; preds = %entry
|
|
%fneg = fneg nnan nsz double %0
|
|
store double %fneg, ptr %x
|
|
br label %return
|
|
|
|
return: ; preds = %entry, %if.then
|
|
%ret = load double, ptr %x
|
|
ret double %ret
|
|
}
|