Simon Pilgrim 0ea8d275cc [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using generic IR
D20859 and D20860 attempted to replace the SSE (V)CVTTPS2DQ and VCVTTPD2DQ truncating conversions with generic IR instead.

It turns out that the behaviour of these intrinsics is different enough from generic IR that this will cause problems, INF/NAN/out of range values are guaranteed to result in a 0x80000000 value - which plays havoc with constant folding which converts them to either zero or UNDEF. This is also an issue with the scalar implementations (which were already generic IR and what I was trying to match).

This patch changes both scalar and packed versions back to using x86-specific builtins.

It also deals with the other scalar conversion cases that are runtime rounding mode dependent and can have similar issues with constant folding.

A companion clang patch is at D22105

Differential Revision: https://reviews.llvm.org/D22106

llvm-svn: 275981
2016-07-19 15:07:43 +00:00

395 lines
14 KiB
LLVM

; RUN: opt < %s -constprop -S | FileCheck %s
; RUN: opt < %s -constprop -disable-simplify-libcalls -S | FileCheck %s --check-prefix=FNOBUILTIN
declare double @acos(double)
declare double @asin(double)
declare double @atan(double)
declare double @atan2(double, double)
declare double @ceil(double)
declare double @cos(double)
declare double @cosh(double)
declare double @exp(double)
declare double @exp2(double)
declare double @fabs(double)
declare double @floor(double)
declare double @fmod(double, double)
declare double @log(double)
declare double @log10(double)
declare double @pow(double, double)
declare double @sin(double)
declare double @sinh(double)
declare double @sqrt(double)
declare double @tan(double)
declare double @tanh(double)
declare float @acosf(float)
declare float @asinf(float)
declare float @atanf(float)
declare float @atan2f(float, float)
declare float @ceilf(float)
declare float @cosf(float)
declare float @coshf(float)
declare float @expf(float)
declare float @exp2f(float)
declare float @fabsf(float)
declare float @floorf(float)
declare float @fmodf(float, float)
declare float @logf(float)
declare float @log10f(float)
declare float @powf(float, float)
declare float @sinf(float)
declare float @sinhf(float)
declare float @sqrtf(float)
declare float @tanf(float)
declare float @tanhf(float)
define double @T() {
; CHECK-LABEL: @T(
; FNOBUILTIN-LABEL: @T(
; CHECK-NOT: call
; CHECK: ret
%A = call double @cos(double 0.000000e+00)
%B = call double @sin(double 0.000000e+00)
%a = fadd double %A, %B
%C = call double @tan(double 0.000000e+00)
%b = fadd double %a, %C
%D = call double @sqrt(double 4.000000e+00)
%c = fadd double %b, %D
%slot = alloca double
%slotf = alloca float
; FNOBUILTIN: call
%1 = call double @acos(double 1.000000e+00)
store double %1, double* %slot
; FNOBUILTIN: call
%2 = call double @asin(double 1.000000e+00)
store double %2, double* %slot
; FNOBUILTIN: call
%3 = call double @atan(double 3.000000e+00)
store double %3, double* %slot
; FNOBUILTIN: call
%4 = call double @atan2(double 3.000000e+00, double 4.000000e+00)
store double %4, double* %slot
; FNOBUILTIN: call
%5 = call double @ceil(double 3.000000e+00)
store double %5, double* %slot
; FNOBUILTIN: call
%6 = call double @cosh(double 3.000000e+00)
store double %6, double* %slot
; FNOBUILTIN: call
%7 = call double @exp(double 3.000000e+00)
store double %7, double* %slot
; FNOBUILTIN: call
%8 = call double @exp2(double 3.000000e+00)
store double %8, double* %slot
; FNOBUILTIN: call
%9 = call double @fabs(double 3.000000e+00)
store double %9, double* %slot
; FNOBUILTIN: call
%10 = call double @floor(double 3.000000e+00)
store double %10, double* %slot
; FNOBUILTIN: call
%11 = call double @fmod(double 3.000000e+00, double 4.000000e+00)
store double %11, double* %slot
; FNOBUILTIN: call
%12 = call double @log(double 3.000000e+00)
store double %12, double* %slot
; FNOBUILTIN: call
%13 = call double @log10(double 3.000000e+00)
store double %13, double* %slot
; FNOBUILTIN: call
%14 = call double @pow(double 3.000000e+00, double 4.000000e+00)
store double %14, double* %slot
; FNOBUILTIN: call
%15 = call double @sinh(double 3.000000e+00)
store double %15, double* %slot
; FNOBUILTIN: call
%16 = call double @tanh(double 3.000000e+00)
store double %16, double* %slot
; FNOBUILTIN: call
%17 = call float @acosf(float 1.000000e+00)
store float %17, float* %slotf
; FNOBUILTIN: call
%18 = call float @asinf(float 1.000000e+00)
store float %18, float* %slotf
; FNOBUILTIN: call
%19 = call float @atanf(float 3.000000e+00)
store float %19, float* %slotf
; FNOBUILTIN: call
%20 = call float @atan2f(float 3.000000e+00, float 4.000000e+00)
store float %20, float* %slotf
; FNOBUILTIN: call
%21 = call float @ceilf(float 3.000000e+00)
store float %21, float* %slotf
; FNOBUILTIN: call
%22 = call float @cosf(float 3.000000e+00)
store float %22, float* %slotf
; FNOBUILTIN: call
%23 = call float @coshf(float 3.000000e+00)
store float %23, float* %slotf
; FNOBUILTIN: call
%24 = call float @expf(float 3.000000e+00)
store float %24, float* %slotf
; FNOBUILTIN: call
%25 = call float @exp2f(float 3.000000e+00)
store float %25, float* %slotf
; FNOBUILTIN: call
%26 = call float @fabsf(float 3.000000e+00)
store float %26, float* %slotf
; FNOBUILTIN: call
%27 = call float @floorf(float 3.000000e+00)
store float %27, float* %slotf
; FNOBUILTIN: call
%28 = call float @fmodf(float 3.000000e+00, float 4.000000e+00)
store float %28, float* %slotf
; FNOBUILTIN: call
%29 = call float @logf(float 3.000000e+00)
store float %29, float* %slotf
; FNOBUILTIN: call
%30 = call float @log10f(float 3.000000e+00)
store float %30, float* %slotf
; FNOBUILTIN: call
%31 = call float @powf(float 3.000000e+00, float 4.000000e+00)
store float %31, float* %slotf
; FNOBUILTIN: call
%32 = call float @sinf(float 3.000000e+00)
store float %32, float* %slotf
; FNOBUILTIN: call
%33 = call float @sinhf(float 3.000000e+00)
store float %33, float* %slotf
; FNOBUILTIN: call
%34 = call float @sqrtf(float 3.000000e+00)
store float %34, float* %slotf
; FNOBUILTIN: call
%35 = call float @tanf(float 3.000000e+00)
store float %35, float* %slotf
; FNOBUILTIN: call
%36 = call float @tanhf(float 3.000000e+00)
store float %36, float* %slotf
; FNOBUILTIN: ret
; PR9315
%E = call double @exp2(double 4.0)
%d = fadd double %c, %E
ret double %d
}
define i1 @test_sse_cvts_exact() nounwind readnone {
; CHECK-LABEL: @test_sse_cvts_exact(
; CHECK-NOT: call
; CHECK: ret i1 true
entry:
%i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> <float 3.0, float undef, float undef, float undef>) nounwind
%i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> <float 3.0, float undef, float undef, float undef>) nounwind
%i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> <double 7.0, double undef>) nounwind
%i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> <double 7.0, double undef>) nounwind
%sum02 = add i32 %i0, %i2
%sum13 = add i64 %i1, %i3
%cmp02 = icmp eq i32 %sum02, 10
%cmp13 = icmp eq i64 %sum13, 10
%b = and i1 %cmp02, %cmp13
ret i1 %b
}
; Inexact values should not fold as they are dependent on rounding mode
define i1 @test_sse_cvts_inexact() nounwind readnone {
; CHECK-LABEL: @test_sse_cvts_inexact(
; CHECK: call
; CHECK: call
; CHECK: call
; CHECK: call
entry:
%i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
%i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
%i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> <double 1.75, double undef>) nounwind
%i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> <double 1.75, double undef>) nounwind
%sum02 = add i32 %i0, %i2
%sum13 = add i64 %i1, %i3
%cmp02 = icmp eq i32 %sum02, 4
%cmp13 = icmp eq i64 %sum13, 4
%b = and i1 %cmp02, %cmp13
ret i1 %b
}
; FLT_MAX/DBL_MAX should not fold
define i1 @test_sse_cvts_max() nounwind readnone {
; CHECK-LABEL: @test_sse_cvts_max(
; CHECK: call
; CHECK: call
; CHECK: call
; CHECK: call
entry:
%fm = bitcast <4 x i32> <i32 2139095039, i32 undef, i32 undef, i32 undef> to <4 x float>
%dm = bitcast <2 x i64> <i64 9218868437227405311, i64 undef> to <2 x double>
%i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %fm) nounwind
%i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %fm) nounwind
%i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %dm) nounwind
%i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %dm) nounwind
%sum02 = add i32 %i0, %i2
%sum13 = add i64 %i1, %i3
%sum02.sext = sext i32 %sum02 to i64
%b = icmp eq i64 %sum02.sext, %sum13
ret i1 %b
}
; INF should not fold
define i1 @test_sse_cvts_inf() nounwind readnone {
; CHECK-LABEL: @test_sse_cvts_inf(
; CHECK: call
; CHECK: call
; CHECK: call
; CHECK: call
entry:
%fm = bitcast <4 x i32> <i32 2139095040, i32 undef, i32 undef, i32 undef> to <4 x float>
%dm = bitcast <2 x i64> <i64 9218868437227405312, i64 undef> to <2 x double>
%i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %fm) nounwind
%i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %fm) nounwind
%i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %dm) nounwind
%i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %dm) nounwind
%sum02 = add i32 %i0, %i2
%sum13 = add i64 %i1, %i3
%sum02.sext = sext i32 %sum02 to i64
%b = icmp eq i64 %sum02.sext, %sum13
ret i1 %b
}
; NAN should not fold
define i1 @test_sse_cvts_nan() nounwind readnone {
; CHECK-LABEL: @test_sse_cvts_nan(
; CHECK: call
; CHECK: call
; CHECK: call
; CHECK: call
entry:
%fm = bitcast <4 x i32> <i32 2143289344, i32 undef, i32 undef, i32 undef> to <4 x float>
%dm = bitcast <2 x i64> <i64 9221120237041090560, i64 undef> to <2 x double>
%i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %fm) nounwind
%i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %fm) nounwind
%i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %dm) nounwind
%i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %dm) nounwind
%sum02 = add i32 %i0, %i2
%sum13 = add i64 %i1, %i3
%sum02.sext = sext i32 %sum02 to i64
%b = icmp eq i64 %sum02.sext, %sum13
ret i1 %b
}
define i1 @test_sse_cvtts_exact() nounwind readnone {
; CHECK-LABEL: @test_sse_cvtts_exact(
; CHECK-NOT: call
; CHECK: ret i1 true
entry:
%i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> <float 3.0, float undef, float undef, float undef>) nounwind
%i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> <float 3.0, float undef, float undef, float undef>) nounwind
%i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> <double 7.0, double undef>) nounwind
%i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> <double 7.0, double undef>) nounwind
%sum02 = add i32 %i0, %i2
%sum13 = add i64 %i1, %i3
%cmp02 = icmp eq i32 %sum02, 10
%cmp13 = icmp eq i64 %sum13, 10
%b = and i1 %cmp02, %cmp13
ret i1 %b
}
define i1 @test_sse_cvtts_inexact() nounwind readnone {
; CHECK-LABEL: @test_sse_cvtts_inexact(
; CHECK-NOT: call
; CHECK: ret i1 true
entry:
%i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
%i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
%i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> <double 1.75, double undef>) nounwind
%i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> <double 1.75, double undef>) nounwind
%sum02 = add i32 %i0, %i2
%sum13 = add i64 %i1, %i3
%cmp02 = icmp eq i32 %sum02, 2
%cmp13 = icmp eq i64 %sum13, 2
%b = and i1 %cmp02, %cmp13
ret i1 %b
}
; FLT_MAX/DBL_MAX should not fold
define i1 @test_sse_cvtts_max() nounwind readnone {
; CHECK-LABEL: @test_sse_cvtts_max(
; CHECK: call
; CHECK: call
; CHECK: call
; CHECK: call
entry:
%fm = bitcast <4 x i32> <i32 2139095039, i32 undef, i32 undef, i32 undef> to <4 x float>
%dm = bitcast <2 x i64> <i64 9218868437227405311, i64 undef> to <2 x double>
%i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %fm) nounwind
%i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %fm) nounwind
%i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %dm) nounwind
%i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %dm) nounwind
%sum02 = add i32 %i0, %i2
%sum13 = add i64 %i1, %i3
%sum02.sext = sext i32 %sum02 to i64
%b = icmp eq i64 %sum02.sext, %sum13
ret i1 %b
}
; INF should not fold
define i1 @test_sse_cvtts_inf() nounwind readnone {
; CHECK-LABEL: @test_sse_cvtts_inf(
; CHECK: call
; CHECK: call
; CHECK: call
; CHECK: call
entry:
%fm = bitcast <4 x i32> <i32 2139095040, i32 undef, i32 undef, i32 undef> to <4 x float>
%dm = bitcast <2 x i64> <i64 9218868437227405312, i64 undef> to <2 x double>
%i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %fm) nounwind
%i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %fm) nounwind
%i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %dm) nounwind
%i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %dm) nounwind
%sum02 = add i32 %i0, %i2
%sum13 = add i64 %i1, %i3
%sum02.sext = sext i32 %sum02 to i64
%b = icmp eq i64 %sum02.sext, %sum13
ret i1 %b
}
; NAN should not fold
define i1 @test_sse_cvtts_nan() nounwind readnone {
; CHECK-LABEL: @test_sse_cvtts_nan(
; CHECK: call
; CHECK: call
; CHECK: call
; CHECK: call
entry:
%fm = bitcast <4 x i32> <i32 2143289344, i32 undef, i32 undef, i32 undef> to <4 x float>
%dm = bitcast <2 x i64> <i64 9221120237041090560, i64 undef> to <2 x double>
%i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %fm) nounwind
%i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %fm) nounwind
%i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %dm) nounwind
%i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %dm) nounwind
%sum02 = add i32 %i0, %i2
%sum13 = add i64 %i1, %i3
%sum02.sext = sext i32 %sum02 to i64
%b = icmp eq i64 %sum02.sext, %sum13
ret i1 %b
}
declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
define double @test_intrinsic_pow() nounwind uwtable ssp {
entry:
; CHECK-LABEL: @test_intrinsic_pow(
; CHECK-NOT: call
; CHECK: ret
%0 = call double @llvm.pow.f64(double 1.500000e+00, double 3.000000e+00)
ret double %0
}
declare double @llvm.pow.f64(double, double) nounwind readonly