
This restores commit c7fdd8c11e54585dc9d15d63de9742067e0506b9. Previously reverted in f010b1bef4dda2c7082cbb41dbabf1f149cce306. LLVM function calls carry convergence control tokens as operand bundles, where the tokens themselves are produced by convergence control intrinsics. This patch implements convergence control tokens in MIR as follows: 1. Introduce target-independent ISD opcodes and MIR opcodes for convergence control intrinsics. 2. Model token values as untyped virtual registers in MIR. The change also introduces an additional ISD opcode CONVERGENCECTRL_GLUE and a corresponding machine opcode with the same spelling. This glues the convergence control token to SDNodes that represent calls to intrinsics. The glued token is later translated to an implicit argument in the MIR. The lowering of calls to user-defined functions is target-specific. On AMDGPU, the convergence control operand bundle at a non-intrinsic call is translated to an explicit argument to the SI_CALL_ISEL instruction. Post-selection adjustment converts this explicit argument to an implicit argument on the SI_CALL instruction.
650 lines
22 KiB
LLVM
650 lines
22 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; REQUIRES: asserts
|
|
; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 | FileCheck %s --check-prefix=FMFDEBUG
|
|
; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s --check-prefix=FMF
|
|
; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG
|
|
; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL
|
|
|
|
; Test FP transforms using instruction/node-level fast-math-flags.
|
|
; We're also checking debug output to verify that FMF is propagated to the newly created nodes.
|
|
; The run with the global unsafe param tests the pre-FMF behavior using regular instructions/nodes.
|
|
|
|
declare float @llvm.fma.f32(float, float, float)
|
|
declare float @llvm.sqrt.f32(float)
|
|
|
|
; X * Y + Z --> fma(X, Y, Z)
|
|
|
|
; contract bits in fmul is checked.
|
|
|
|
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract1:'
|
|
; FMFDEBUG-NOT: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
|
|
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract1:'
|
|
|
|
define float @fmul_fadd_contract1(float %x, float %y, float %z) {
|
|
; FMF-LABEL: fmul_fadd_contract1:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: xsmulsp 0, 1, 2
|
|
; FMF-NEXT: xsaddsp 1, 0, 3
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: fmul_fadd_contract1:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: xsmaddasp 3, 1, 2
|
|
; GLOBAL-NEXT: fmr 1, 3
|
|
; GLOBAL-NEXT: blr
|
|
%mul = fmul float %x, %y
|
|
%add = fadd contract float %mul, %z
|
|
ret float %add
|
|
}
|
|
|
|
; contract bits in fadd is also checked.
|
|
|
|
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract2:'
|
|
; FMFDEBUG-NOT: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
|
|
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract2:'
|
|
|
|
define float @fmul_fadd_contract2(float %x, float %y, float %z) {
|
|
; FMF-LABEL: fmul_fadd_contract2:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: xsmulsp 0, 1, 2
|
|
; FMF-NEXT: xsaddsp 1, 0, 3
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: fmul_fadd_contract2:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: xsmaddasp 3, 1, 2
|
|
; GLOBAL-NEXT: fmr 1, 3
|
|
; GLOBAL-NEXT: blr
|
|
%mul = fmul contract float %x, %y
|
|
%add = fadd float %mul, %z
|
|
ret float %add
|
|
}
|
|
|
|
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract3:'
|
|
; FMFDEBUG: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
|
|
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract3:'
|
|
|
|
define float @fmul_fadd_contract3(float %x, float %y, float %z) {
|
|
; FMF-LABEL: fmul_fadd_contract3:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: xsmaddasp 3, 1, 2
|
|
; FMF-NEXT: fmr 1, 3
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: fmul_fadd_contract3:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: xsmaddasp 3, 1, 2
|
|
; GLOBAL-NEXT: fmr 1, 3
|
|
; GLOBAL-NEXT: blr
|
|
%mul = fmul contract float %x, %y
|
|
%add = fadd contract float %mul, %z
|
|
ret float %add
|
|
}
|
|
|
|
; Reassociation does NOT imply that FMA contraction is allowed.
|
|
|
|
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:'
|
|
; FMFDEBUG-NOT: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
|
|
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:'
|
|
|
|
define float @fmul_fadd_reassoc1(float %x, float %y, float %z) {
|
|
; FMF-LABEL: fmul_fadd_reassoc1:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: xsmulsp 0, 1, 2
|
|
; FMF-NEXT: xsaddsp 1, 0, 3
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: fmul_fadd_reassoc1:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: xsmaddasp 3, 1, 2
|
|
; GLOBAL-NEXT: fmr 1, 3
|
|
; GLOBAL-NEXT: blr
|
|
%mul = fmul float %x, %y
|
|
%add = fadd reassoc float %mul, %z
|
|
ret float %add
|
|
}
|
|
|
|
; This shouldn't change anything - the intermediate fmul result is now also flagged.
|
|
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc2:'
|
|
; FMFDEBUG-NOT: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}
|
|
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc2:'
|
|
|
|
define float @fmul_fadd_reassoc2(float %x, float %y, float %z) {
|
|
; FMF-LABEL: fmul_fadd_reassoc2:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: xsmulsp 0, 1, 2
|
|
; FMF-NEXT: xsaddsp 1, 0, 3
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: fmul_fadd_reassoc2:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: xsmaddasp 3, 1, 2
|
|
; GLOBAL-NEXT: fmr 1, 3
|
|
; GLOBAL-NEXT: blr
|
|
%mul = fmul reassoc float %x, %y
|
|
%add = fadd reassoc float %mul, %z
|
|
ret float %add
|
|
}
|
|
|
|
; The fadd is now fully 'fast', but fmul is not yet.
|
|
|
|
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast1:'
|
|
; FMFDEBUG-NOT: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
|
|
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_fast1:'
|
|
|
|
define float @fmul_fadd_fast1(float %x, float %y, float %z) {
|
|
; FMF-LABEL: fmul_fadd_fast1:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: xsmulsp 0, 1, 2
|
|
; FMF-NEXT: xsaddsp 1, 0, 3
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: fmul_fadd_fast1:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: xsmaddasp 3, 1, 2
|
|
; GLOBAL-NEXT: fmr 1, 3
|
|
; GLOBAL-NEXT: blr
|
|
%mul = fmul float %x, %y
|
|
%add = fadd fast float %mul, %z
|
|
ret float %add
|
|
}
|
|
|
|
; This implies that contraction is allowed - the intermediate fmul result is now also flagged.
|
|
|
|
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast2:'
|
|
; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
|
|
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_fast2:'
|
|
|
|
define float @fmul_fadd_fast2(float %x, float %y, float %z) {
|
|
; FMF-LABEL: fmul_fadd_fast2:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: xsmaddasp 3, 1, 2
|
|
; FMF-NEXT: fmr 1, 3
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: fmul_fadd_fast2:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: xsmaddasp 3, 1, 2
|
|
; GLOBAL-NEXT: fmr 1, 3
|
|
; GLOBAL-NEXT: blr
|
|
%mul = fmul fast float %x, %y
|
|
%add = fadd fast float %mul, %z
|
|
ret float %add
|
|
}
|
|
|
|
; fma(X, 7.0, X * 42.0) --> X * 49.0
|
|
; This is the minimum FMF needed for this transform - the FMA allows reassociation.
|
|
|
|
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
|
|
; FMFDEBUG: fmul reassoc {{t[0-9]+}},
|
|
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
|
|
|
|
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
|
|
; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}
|
|
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
|
|
|
|
define float @fmul_fma_reassoc1(float %x) {
|
|
; FMF-LABEL: fmul_fma_reassoc1:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: addis 3, 2, .LCPI7_0@toc@ha
|
|
; FMF-NEXT: lfs 0, .LCPI7_0@toc@l(3)
|
|
; FMF-NEXT: xsmulsp 1, 1, 0
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: fmul_fma_reassoc1:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: addis 3, 2, .LCPI7_0@toc@ha
|
|
; GLOBAL-NEXT: lfs 0, .LCPI7_0@toc@l(3)
|
|
; GLOBAL-NEXT: xsmulsp 1, 1, 0
|
|
; GLOBAL-NEXT: blr
|
|
%mul = fmul float %x, 42.0
|
|
%fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
|
|
ret float %fma
|
|
}
|
|
|
|
; This shouldn't change anything - the intermediate fmul result is now also flagged.
|
|
|
|
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
|
|
; FMFDEBUG: fmul reassoc {{t[0-9]+}}
|
|
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
|
|
|
|
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
|
|
; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}
|
|
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
|
|
|
|
define float @fmul_fma_reassoc2(float %x) {
|
|
; FMF-LABEL: fmul_fma_reassoc2:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha
|
|
; FMF-NEXT: lfs 0, .LCPI8_0@toc@l(3)
|
|
; FMF-NEXT: xsmulsp 1, 1, 0
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: fmul_fma_reassoc2:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: addis 3, 2, .LCPI8_0@toc@ha
|
|
; GLOBAL-NEXT: lfs 0, .LCPI8_0@toc@l(3)
|
|
; GLOBAL-NEXT: xsmulsp 1, 1, 0
|
|
; GLOBAL-NEXT: blr
|
|
%mul = fmul reassoc float %x, 42.0
|
|
%fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
|
|
ret float %fma
|
|
}
|
|
|
|
; The FMA is now fully 'fast'. This implies that reassociation is allowed.
|
|
|
|
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
|
|
; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
|
|
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
|
|
|
|
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
|
|
; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
|
|
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
|
|
|
|
define float @fmul_fma_fast1(float %x) {
|
|
; FMF-LABEL: fmul_fma_fast1:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha
|
|
; FMF-NEXT: lfs 0, .LCPI9_0@toc@l(3)
|
|
; FMF-NEXT: xsmulsp 1, 1, 0
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: fmul_fma_fast1:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: addis 3, 2, .LCPI9_0@toc@ha
|
|
; GLOBAL-NEXT: lfs 0, .LCPI9_0@toc@l(3)
|
|
; GLOBAL-NEXT: xsmulsp 1, 1, 0
|
|
; GLOBAL-NEXT: blr
|
|
%mul = fmul float %x, 42.0
|
|
%fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul)
|
|
ret float %fma
|
|
}
|
|
|
|
; This shouldn't change anything - the intermediate fmul result is now also flagged.
|
|
|
|
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
|
|
; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
|
|
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
|
|
|
|
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
|
|
; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
|
|
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
|
|
|
|
define float @fmul_fma_fast2(float %x) {
|
|
; FMF-LABEL: fmul_fma_fast2:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha
|
|
; FMF-NEXT: lfs 0, .LCPI10_0@toc@l(3)
|
|
; FMF-NEXT: xsmulsp 1, 1, 0
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: fmul_fma_fast2:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha
|
|
; GLOBAL-NEXT: lfs 0, .LCPI10_0@toc@l(3)
|
|
; GLOBAL-NEXT: xsmulsp 1, 1, 0
|
|
; GLOBAL-NEXT: blr
|
|
%mul = fmul fast float %x, 42.0
|
|
%fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul)
|
|
ret float %fma
|
|
}
|
|
|
|
; Reduced precision for sqrt is allowed - should use estimate and NR iterations.
|
|
|
|
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:'
|
|
; FMFDEBUG: fmul ninf afn {{t[0-9]+}}
|
|
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:'
|
|
|
|
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:'
|
|
; GLOBALDEBUG: fmul ninf afn {{t[0-9]+}}
|
|
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:'
|
|
|
|
define float @sqrt_afn_ieee(float %x) #0 {
|
|
; FMF-LABEL: sqrt_afn_ieee:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: addis 3, 2, .LCPI11_1@toc@ha
|
|
; FMF-NEXT: xsabsdp 0, 1
|
|
; FMF-NEXT: lfs 2, .LCPI11_1@toc@l(3)
|
|
; FMF-NEXT: fcmpu 0, 0, 2
|
|
; FMF-NEXT: xxlxor 0, 0, 0
|
|
; FMF-NEXT: blt 0, .LBB11_2
|
|
; FMF-NEXT: # %bb.1:
|
|
; FMF-NEXT: xsrsqrtesp 2, 1
|
|
; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha
|
|
; FMF-NEXT: vspltisw 2, -3
|
|
; FMF-NEXT: lfs 0, .LCPI11_0@toc@l(3)
|
|
; FMF-NEXT: xsmulsp 1, 1, 2
|
|
; FMF-NEXT: xsmulsp 0, 1, 0
|
|
; FMF-NEXT: xsmulsp 1, 1, 2
|
|
; FMF-NEXT: xvcvsxwdp 2, 34
|
|
; FMF-NEXT: xsaddsp 1, 1, 2
|
|
; FMF-NEXT: xsmulsp 0, 0, 1
|
|
; FMF-NEXT: .LBB11_2:
|
|
; FMF-NEXT: fmr 1, 0
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: sqrt_afn_ieee:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: addis 3, 2, .LCPI11_1@toc@ha
|
|
; GLOBAL-NEXT: xsabsdp 0, 1
|
|
; GLOBAL-NEXT: lfs 2, .LCPI11_1@toc@l(3)
|
|
; GLOBAL-NEXT: fcmpu 0, 0, 2
|
|
; GLOBAL-NEXT: xxlxor 0, 0, 0
|
|
; GLOBAL-NEXT: blt 0, .LBB11_2
|
|
; GLOBAL-NEXT: # %bb.1:
|
|
; GLOBAL-NEXT: xsrsqrtesp 0, 1
|
|
; GLOBAL-NEXT: vspltisw 2, -3
|
|
; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha
|
|
; GLOBAL-NEXT: xvcvsxwdp 2, 34
|
|
; GLOBAL-NEXT: xsmulsp 1, 1, 0
|
|
; GLOBAL-NEXT: xsmaddasp 2, 1, 0
|
|
; GLOBAL-NEXT: lfs 0, .LCPI11_0@toc@l(3)
|
|
; GLOBAL-NEXT: xsmulsp 0, 1, 0
|
|
; GLOBAL-NEXT: xsmulsp 0, 0, 2
|
|
; GLOBAL-NEXT: .LBB11_2:
|
|
; GLOBAL-NEXT: fmr 1, 0
|
|
; GLOBAL-NEXT: blr
|
|
%rt = call afn ninf float @llvm.sqrt.f32(float %x)
|
|
ret float %rt
|
|
}
|
|
|
|
define float @sqrt_afn_ieee_inf(float %x) #0 {
|
|
; FMF-LABEL: sqrt_afn_ieee_inf:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: xssqrtsp 1, 1
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: sqrt_afn_ieee_inf:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: xssqrtsp 1, 1
|
|
; GLOBAL-NEXT: blr
|
|
%rt = call afn float @llvm.sqrt.f32(float %x)
|
|
ret float %rt
|
|
}
|
|
|
|
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
|
|
; FMFDEBUG: fmul ninf afn {{t[0-9]+}}
|
|
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
|
|
|
|
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
|
|
; GLOBALDEBUG: fmul ninf afn {{t[0-9]+}}
|
|
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
|
|
|
|
define float @sqrt_afn_preserve_sign(float %x) #1 {
|
|
; FMF-LABEL: sqrt_afn_preserve_sign:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: xxlxor 0, 0, 0
|
|
; FMF-NEXT: fcmpu 0, 1, 0
|
|
; FMF-NEXT: beq 0, .LBB13_2
|
|
; FMF-NEXT: # %bb.1:
|
|
; FMF-NEXT: xsrsqrtesp 0, 1
|
|
; FMF-NEXT: addis 3, 2, .LCPI13_0@toc@ha
|
|
; FMF-NEXT: vspltisw 2, -3
|
|
; FMF-NEXT: lfs 2, .LCPI13_0@toc@l(3)
|
|
; FMF-NEXT: xsmulsp 1, 1, 0
|
|
; FMF-NEXT: xsmulsp 2, 1, 2
|
|
; FMF-NEXT: xsmulsp 0, 1, 0
|
|
; FMF-NEXT: xvcvsxwdp 1, 34
|
|
; FMF-NEXT: xsaddsp 0, 0, 1
|
|
; FMF-NEXT: xsmulsp 0, 2, 0
|
|
; FMF-NEXT: .LBB13_2:
|
|
; FMF-NEXT: fmr 1, 0
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: sqrt_afn_preserve_sign:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: xxlxor 0, 0, 0
|
|
; GLOBAL-NEXT: fcmpu 0, 1, 0
|
|
; GLOBAL-NEXT: beq 0, .LBB13_2
|
|
; GLOBAL-NEXT: # %bb.1:
|
|
; GLOBAL-NEXT: xsrsqrtesp 0, 1
|
|
; GLOBAL-NEXT: vspltisw 2, -3
|
|
; GLOBAL-NEXT: addis 3, 2, .LCPI13_0@toc@ha
|
|
; GLOBAL-NEXT: xvcvsxwdp 2, 34
|
|
; GLOBAL-NEXT: xsmulsp 1, 1, 0
|
|
; GLOBAL-NEXT: xsmaddasp 2, 1, 0
|
|
; GLOBAL-NEXT: lfs 0, .LCPI13_0@toc@l(3)
|
|
; GLOBAL-NEXT: xsmulsp 0, 1, 0
|
|
; GLOBAL-NEXT: xsmulsp 0, 0, 2
|
|
; GLOBAL-NEXT: .LBB13_2:
|
|
; GLOBAL-NEXT: fmr 1, 0
|
|
; GLOBAL-NEXT: blr
|
|
%rt = call afn ninf float @llvm.sqrt.f32(float %x)
|
|
ret float %rt
|
|
}
|
|
|
|
define float @sqrt_afn_preserve_sign_inf(float %x) #1 {
|
|
; FMF-LABEL: sqrt_afn_preserve_sign_inf:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: xssqrtsp 1, 1
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: sqrt_afn_preserve_sign_inf:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: xssqrtsp 1, 1
|
|
; GLOBAL-NEXT: blr
|
|
%rt = call afn float @llvm.sqrt.f32(float %x)
|
|
ret float %rt
|
|
}
|
|
|
|
; The call is now fully 'fast'. This implies that approximation is allowed.
|
|
|
|
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:'
|
|
; FMFDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}}
|
|
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:'
|
|
|
|
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:'
|
|
; GLOBALDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}}
|
|
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:'
|
|
|
|
define float @sqrt_fast_ieee(float %x) #0 {
|
|
; FMF-LABEL: sqrt_fast_ieee:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: addis 3, 2, .LCPI15_1@toc@ha
|
|
; FMF-NEXT: xsabsdp 0, 1
|
|
; FMF-NEXT: lfs 2, .LCPI15_1@toc@l(3)
|
|
; FMF-NEXT: fcmpu 0, 0, 2
|
|
; FMF-NEXT: xxlxor 0, 0, 0
|
|
; FMF-NEXT: blt 0, .LBB15_2
|
|
; FMF-NEXT: # %bb.1:
|
|
; FMF-NEXT: xsrsqrtesp 0, 1
|
|
; FMF-NEXT: vspltisw 2, -3
|
|
; FMF-NEXT: addis 3, 2, .LCPI15_0@toc@ha
|
|
; FMF-NEXT: xvcvsxwdp 2, 34
|
|
; FMF-NEXT: xsmulsp 1, 1, 0
|
|
; FMF-NEXT: xsmaddasp 2, 1, 0
|
|
; FMF-NEXT: lfs 0, .LCPI15_0@toc@l(3)
|
|
; FMF-NEXT: xsmulsp 0, 1, 0
|
|
; FMF-NEXT: xsmulsp 0, 0, 2
|
|
; FMF-NEXT: .LBB15_2:
|
|
; FMF-NEXT: fmr 1, 0
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: sqrt_fast_ieee:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: addis 3, 2, .LCPI15_1@toc@ha
|
|
; GLOBAL-NEXT: xsabsdp 0, 1
|
|
; GLOBAL-NEXT: lfs 2, .LCPI15_1@toc@l(3)
|
|
; GLOBAL-NEXT: fcmpu 0, 0, 2
|
|
; GLOBAL-NEXT: xxlxor 0, 0, 0
|
|
; GLOBAL-NEXT: blt 0, .LBB15_2
|
|
; GLOBAL-NEXT: # %bb.1:
|
|
; GLOBAL-NEXT: xsrsqrtesp 0, 1
|
|
; GLOBAL-NEXT: vspltisw 2, -3
|
|
; GLOBAL-NEXT: addis 3, 2, .LCPI15_0@toc@ha
|
|
; GLOBAL-NEXT: xvcvsxwdp 2, 34
|
|
; GLOBAL-NEXT: xsmulsp 1, 1, 0
|
|
; GLOBAL-NEXT: xsmaddasp 2, 1, 0
|
|
; GLOBAL-NEXT: lfs 0, .LCPI15_0@toc@l(3)
|
|
; GLOBAL-NEXT: xsmulsp 0, 1, 0
|
|
; GLOBAL-NEXT: xsmulsp 0, 0, 2
|
|
; GLOBAL-NEXT: .LBB15_2:
|
|
; GLOBAL-NEXT: fmr 1, 0
|
|
; GLOBAL-NEXT: blr
|
|
%rt = call contract reassoc afn ninf float @llvm.sqrt.f32(float %x)
|
|
ret float %rt
|
|
}
|
|
|
|
; The call is now fully 'fast'. This implies that approximation is allowed.
|
|
|
|
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
|
|
; FMFDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}}
|
|
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
|
|
|
|
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
|
|
; GLOBALDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}}
|
|
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
|
|
|
|
define float @sqrt_fast_preserve_sign(float %x) #1 {
|
|
; FMF-LABEL: sqrt_fast_preserve_sign:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: xxlxor 0, 0, 0
|
|
; FMF-NEXT: fcmpu 0, 1, 0
|
|
; FMF-NEXT: beq 0, .LBB16_2
|
|
; FMF-NEXT: # %bb.1:
|
|
; FMF-NEXT: xsrsqrtesp 0, 1
|
|
; FMF-NEXT: vspltisw 2, -3
|
|
; FMF-NEXT: addis 3, 2, .LCPI16_0@toc@ha
|
|
; FMF-NEXT: xvcvsxwdp 2, 34
|
|
; FMF-NEXT: xsmulsp 1, 1, 0
|
|
; FMF-NEXT: xsmaddasp 2, 1, 0
|
|
; FMF-NEXT: lfs 0, .LCPI16_0@toc@l(3)
|
|
; FMF-NEXT: xsmulsp 0, 1, 0
|
|
; FMF-NEXT: xsmulsp 0, 0, 2
|
|
; FMF-NEXT: .LBB16_2:
|
|
; FMF-NEXT: fmr 1, 0
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: sqrt_fast_preserve_sign:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: xxlxor 0, 0, 0
|
|
; GLOBAL-NEXT: fcmpu 0, 1, 0
|
|
; GLOBAL-NEXT: beq 0, .LBB16_2
|
|
; GLOBAL-NEXT: # %bb.1:
|
|
; GLOBAL-NEXT: xsrsqrtesp 0, 1
|
|
; GLOBAL-NEXT: vspltisw 2, -3
|
|
; GLOBAL-NEXT: addis 3, 2, .LCPI16_0@toc@ha
|
|
; GLOBAL-NEXT: xvcvsxwdp 2, 34
|
|
; GLOBAL-NEXT: xsmulsp 1, 1, 0
|
|
; GLOBAL-NEXT: xsmaddasp 2, 1, 0
|
|
; GLOBAL-NEXT: lfs 0, .LCPI16_0@toc@l(3)
|
|
; GLOBAL-NEXT: xsmulsp 0, 1, 0
|
|
; GLOBAL-NEXT: xsmulsp 0, 0, 2
|
|
; GLOBAL-NEXT: .LBB16_2:
|
|
; GLOBAL-NEXT: fmr 1, 0
|
|
; GLOBAL-NEXT: blr
|
|
%rt = call contract reassoc ninf afn float @llvm.sqrt.f32(float %x)
|
|
ret float %rt
|
|
}
|
|
|
|
; fcmp can have fast-math-flags.
|
|
|
|
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:'
|
|
; FMFDEBUG: select_cc nnan {{t[0-9]+}}
|
|
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fcmp_nnan:'
|
|
|
|
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:'
|
|
; GLOBALDEBUG: select_cc nnan {{t[0-9]+}}
|
|
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fcmp_nnan:'
|
|
|
|
define double @fcmp_nnan(double %a, double %y, double %z) {
|
|
; FMF-LABEL: fcmp_nnan:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: xxlxor 0, 0, 0
|
|
; FMF-NEXT: xscmpudp 0, 1, 0
|
|
; FMF-NEXT: blt 0, .LBB17_2
|
|
; FMF-NEXT: # %bb.1:
|
|
; FMF-NEXT: fmr 3, 2
|
|
; FMF-NEXT: .LBB17_2:
|
|
; FMF-NEXT: fmr 1, 3
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: fcmp_nnan:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: xxlxor 0, 0, 0
|
|
; GLOBAL-NEXT: xscmpudp 0, 1, 0
|
|
; GLOBAL-NEXT: blt 0, .LBB17_2
|
|
; GLOBAL-NEXT: # %bb.1:
|
|
; GLOBAL-NEXT: fmr 3, 2
|
|
; GLOBAL-NEXT: .LBB17_2:
|
|
; GLOBAL-NEXT: fmr 1, 3
|
|
; GLOBAL-NEXT: blr
|
|
%cmp = fcmp nnan ult double %a, 0.0
|
|
%z.y = select i1 %cmp, double %z, double %y
|
|
ret double %z.y
|
|
}
|
|
|
|
; FP library calls can have fast-math-flags.
|
|
|
|
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:'
|
|
; FMFDEBUG: ch,glue = PPCISD::CALL_NOP {{t[0-9]+}}, TargetGlobalAddress:i64<ptr @log2>
|
|
; FMFDEBUG: ch,glue = callseq_end [[T15:t[0-9]+]], TargetConstant:i64<32>, TargetConstant:i64<0>, [[T15]]:1
|
|
; FMFDEBUG: f64,ch,glue = CopyFromReg [[T16:t[0-9]+]], Register:f64 $f1, [[T16]]:1
|
|
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:'
|
|
|
|
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:'
|
|
; GLOBALDEBUG: ch,glue = PPCISD::CALL_NOP {{t[0-9]+}}, TargetGlobalAddress:i64<ptr @log2>
|
|
; GLOBALDEBUG: ch,glue = callseq_end [[T15:t[0-9]+]], TargetConstant:i64<32>, TargetConstant:i64<0>, [[T15]]:1
|
|
; GLOBALDEBUG: f64,ch,glue = CopyFromReg [[T16:t[0-9]+]], Register:f64 $f1, [[T16]]:1
|
|
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:'
|
|
|
|
declare double @log2(double)
|
|
define double @log2_approx(double %x) nounwind {
|
|
; FMF-LABEL: log2_approx:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: mflr 0
|
|
; FMF-NEXT: stdu 1, -32(1)
|
|
; FMF-NEXT: std 0, 48(1)
|
|
; FMF-NEXT: bl log2
|
|
; FMF-NEXT: nop
|
|
; FMF-NEXT: addi 1, 1, 32
|
|
; FMF-NEXT: ld 0, 16(1)
|
|
; FMF-NEXT: mtlr 0
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: log2_approx:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: mflr 0
|
|
; GLOBAL-NEXT: stdu 1, -32(1)
|
|
; GLOBAL-NEXT: std 0, 48(1)
|
|
; GLOBAL-NEXT: bl log2
|
|
; GLOBAL-NEXT: nop
|
|
; GLOBAL-NEXT: addi 1, 1, 32
|
|
; GLOBAL-NEXT: ld 0, 16(1)
|
|
; GLOBAL-NEXT: mtlr 0
|
|
; GLOBAL-NEXT: blr
|
|
%r = call afn double @log2(double %x)
|
|
ret double %r
|
|
}
|
|
|
|
; -(X - Y) --> (Y - X)
|
|
|
|
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
|
|
; FMFDEBUG: fsub nsz {{t[0-9]+}}, {{t[0-9]+}}
|
|
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
|
|
|
|
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
|
|
; GLOBALDEBUG: fsub nsz {{t[0-9]+}}, {{t[0-9]+}}
|
|
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
|
|
|
|
define float @fneg_fsub_nozeros_1(float %x, float %y, float %z) {
|
|
; FMF-LABEL: fneg_fsub_nozeros_1:
|
|
; FMF: # %bb.0:
|
|
; FMF-NEXT: xssubsp 1, 2, 1
|
|
; FMF-NEXT: blr
|
|
;
|
|
; GLOBAL-LABEL: fneg_fsub_nozeros_1:
|
|
; GLOBAL: # %bb.0:
|
|
; GLOBAL-NEXT: xssubsp 1, 2, 1
|
|
; GLOBAL-NEXT: blr
|
|
%neg = fsub float %x, %y
|
|
%add = fsub nsz float 0.0, %neg
|
|
ret float %add
|
|
}
|
|
|
|
attributes #0 = { "denormal-fp-math"="ieee,ieee" }
|
|
attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; FMFDEBUG: {{.*}}
|
|
; GLOBALDEBUG: {{.*}}
|