
We need to ensure that fpexcept.strict nodes are not optimized away even if the result is unused. To do that, we need to chain them into the block's terminator nodes, like already done for PendingExcepts. This patch adds two new lists of pending chains, PendingConstrainedFP and PendingConstrainedFPStrict to hold constrained FP intrinsic nodes without and with fpexcept.strict markers. This allows not only to solve the above problem, but also to relax chains a bit further by no longer flushing all FP nodes before a store or other memory access. (They are still flushed before nodes with other side effects.) Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D72341
60 lines
3.5 KiB
LLVM
60 lines
3.5 KiB
LLVM
; RUN: llc -O3 -mtriple=x86_64-pc-linux -stop-after=finalize-isel < %s | FileCheck %s
|
|
|
|
define <1 x float> @constrained_vector_fadd_v1f32() #0 {
|
|
; CHECK-LABEL: name: constrained_vector_fadd_v1f32
|
|
; CHECK: [[MOVSSrm_alt:%[0-9]+]]:fr32 = MOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool)
|
|
; CHECK: [[ADDSSrm:%[0-9]+]]:fr32 = ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load 4 from constant-pool)
|
|
; CHECK: $xmm0 = COPY [[ADDSSrm]]
|
|
; CHECK: RET 0, $xmm0
|
|
entry:
|
|
%add = call <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float> <float 0x7FF0000000000000>, <1 x float> <float 1.0>, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
|
|
ret <1 x float> %add
|
|
}
|
|
|
|
define <3 x float> @constrained_vector_fadd_v3f32() #0 {
|
|
; CHECK-LABEL: name: constrained_vector_fadd_v3f32
|
|
; CHECK: [[FsFLD0SS:%[0-9]+]]:fr32 = FsFLD0SS
|
|
; CHECK: [[MOVSSrm_alt:%[0-9]+]]:fr32 = MOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool)
|
|
; CHECK: [[ADDSSrr:%[0-9]+]]:fr32 = ADDSSrr [[MOVSSrm_alt]], killed [[FsFLD0SS]], implicit $mxcsr
|
|
; CHECK: [[ADDSSrm:%[0-9]+]]:fr32 = ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load 4 from constant-pool)
|
|
; CHECK: [[ADDSSrm1:%[0-9]+]]:fr32 = ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.2, $noreg, implicit $mxcsr :: (load 4 from constant-pool)
|
|
; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY [[ADDSSrm1]]
|
|
; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY [[ADDSSrm]]
|
|
; CHECK: [[UNPCKLPSrr:%[0-9]+]]:vr128 = UNPCKLPSrr [[COPY1]], killed [[COPY]]
|
|
; CHECK: [[COPY2:%[0-9]+]]:vr128 = COPY [[ADDSSrr]]
|
|
; CHECK: [[UNPCKLPDrr:%[0-9]+]]:vr128 = UNPCKLPDrr [[UNPCKLPSrr]], killed [[COPY2]]
|
|
; CHECK: $xmm0 = COPY [[UNPCKLPDrr]]
|
|
; CHECK: RET 0, $xmm0
|
|
entry:
|
|
%add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32(
|
|
<3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
|
|
float 0xFFFFFFFFE0000000>,
|
|
<3 x float> <float 2.0, float 1.0, float 0.0>,
|
|
metadata !"round.dynamic",
|
|
metadata !"fpexcept.strict") #0
|
|
ret <3 x float> %add
|
|
}
|
|
|
|
define <4 x double> @constrained_vector_fadd_v4f64() #0 {
|
|
; CHECK-LABEL: name: constrained_vector_fadd_v4f64
|
|
; CHECK: [[MOVAPDrm:%[0-9]+]]:vr128 = MOVAPDrm $rip, 1, $noreg, %const.0, $noreg :: (load 16 from constant-pool)
|
|
; CHECK: [[ADDPDrm:%[0-9]+]]:vr128 = ADDPDrm [[MOVAPDrm]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load 16 from constant-pool)
|
|
; CHECK: [[ADDPDrm1:%[0-9]+]]:vr128 = ADDPDrm [[MOVAPDrm]], $rip, 1, $noreg, %const.2, $noreg, implicit $mxcsr :: (load 16 from constant-pool)
|
|
; CHECK: $xmm0 = COPY [[ADDPDrm1]]
|
|
; CHECK: $xmm1 = COPY [[ADDPDrm]]
|
|
; CHECK: RET 0, $xmm0, $xmm1
|
|
entry:
|
|
%add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(
|
|
<4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
|
|
double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
|
|
<4 x double> <double 1.000000e+00, double 1.000000e-01,
|
|
double 2.000000e+00, double 2.000000e-01>,
|
|
metadata !"round.dynamic",
|
|
metadata !"fpexcept.strict") #0
|
|
ret <4 x double> %add
|
|
}
|
|
|
|
declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata)
|
|
declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata)
|
|
declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata)
|