llvm-project/llvm/test/CodeGen/X86/fp-strict-scalar-round.ll
Roman Lebedev 0aef747b84
[NFC][X86][Codegen] Megacommit: mass-regenerate all check lines that were already autogenerated
The motivation is that the update script has at least two deviations
(`<...>@GOT`/`<...>@PLT`/ and not hiding pointer arithmetics) from
what pretty much all the checklines were generated with,
and most of the tests are still not updated, so each time one of the
non-up-to-date tests is updated to see the effect of the code change,
there is a lot of noise. Instead of having to deal with that each
time, let's just deal with everything at once.

This has been done via:
```
cd llvm-project/llvm/test/CodeGen/X86
grep -rl "; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py" | xargs -L1 <...>/llvm-project/llvm/utils/update_llc_test_checks.py --llc-binary <...>/llvm-project/build/bin/llc
```

Not all tests were regenerated, however.
2021-06-11 23:57:02 +03:00

683 lines
23 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 -O3 | FileCheck %s --check-prefix=SSE41-X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -O3 | FileCheck %s --check-prefix=SSE41-X64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefix=AVX-X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefix=AVX-X64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefix=AVX-X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefix=AVX-X64
declare float @llvm.experimental.constrained.ceil.f32(float, metadata)
declare double @llvm.experimental.constrained.ceil.f64(double, metadata)
declare float @llvm.experimental.constrained.floor.f32(float, metadata)
declare double @llvm.experimental.constrained.floor.f64(double, metadata)
declare float @llvm.experimental.constrained.trunc.f32(float, metadata)
declare double @llvm.experimental.constrained.trunc.f64(double, metadata)
declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
declare float @llvm.experimental.constrained.round.f32(float, metadata)
declare double @llvm.experimental.constrained.round.f64(double, metadata)
declare float @llvm.experimental.constrained.roundeven.f32(float, metadata)
declare double @llvm.experimental.constrained.roundeven.f64(double, metadata)
define float @fceil32(float %f) #0 {
; SSE41-X86-LABEL: fceil32:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-X86-NEXT: roundss $10, %xmm0, %xmm0
; SSE41-X86-NEXT: movss %xmm0, (%esp)
; SSE41-X86-NEXT: flds (%esp)
; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: popl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: fceil32:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundss $10, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: fceil32:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-X86-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: fceil32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call float @llvm.experimental.constrained.ceil.f32(
float %f, metadata !"fpexcept.strict") #0
ret float %res
}
define double @fceilf64(double %f) #0 {
; SSE41-X86-LABEL: fceilf64:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: .cfi_offset %ebp, -8
; SSE41-X86-NEXT: movl %esp, %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
; SSE41-X86-NEXT: andl $-8, %esp
; SSE41-X86-NEXT: subl $8, %esp
; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE41-X86-NEXT: roundsd $10, %xmm0, %xmm0
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
; SSE41-X86-NEXT: fldl (%esp)
; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: movl %ebp, %esp
; SSE41-X86-NEXT: popl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: fceilf64:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundsd $10, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: fceilf64:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %ebp
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: .cfi_offset %ebp, -8
; AVX-X86-NEXT: movl %esp, %ebp
; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
; AVX-X86-NEXT: andl $-8, %esp
; AVX-X86-NEXT: subl $8, %esp
; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-X86-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: fceilf64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call double @llvm.experimental.constrained.ceil.f64(
double %f, metadata !"fpexcept.strict") #0
ret double %res
}
define float @ffloor32(float %f) #0 {
; SSE41-X86-LABEL: ffloor32:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-X86-NEXT: roundss $9, %xmm0, %xmm0
; SSE41-X86-NEXT: movss %xmm0, (%esp)
; SSE41-X86-NEXT: flds (%esp)
; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: popl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: ffloor32:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundss $9, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: ffloor32:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-X86-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: ffloor32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call float @llvm.experimental.constrained.floor.f32(
float %f, metadata !"fpexcept.strict") #0
ret float %res
}
define double @ffloorf64(double %f) #0 {
; SSE41-X86-LABEL: ffloorf64:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: .cfi_offset %ebp, -8
; SSE41-X86-NEXT: movl %esp, %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
; SSE41-X86-NEXT: andl $-8, %esp
; SSE41-X86-NEXT: subl $8, %esp
; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE41-X86-NEXT: roundsd $9, %xmm0, %xmm0
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
; SSE41-X86-NEXT: fldl (%esp)
; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: movl %ebp, %esp
; SSE41-X86-NEXT: popl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: ffloorf64:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundsd $9, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: ffloorf64:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %ebp
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: .cfi_offset %ebp, -8
; AVX-X86-NEXT: movl %esp, %ebp
; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
; AVX-X86-NEXT: andl $-8, %esp
; AVX-X86-NEXT: subl $8, %esp
; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-X86-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: ffloorf64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call double @llvm.experimental.constrained.floor.f64(
double %f, metadata !"fpexcept.strict") #0
ret double %res
}
define float @ftrunc32(float %f) #0 {
; SSE41-X86-LABEL: ftrunc32:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-X86-NEXT: roundss $11, %xmm0, %xmm0
; SSE41-X86-NEXT: movss %xmm0, (%esp)
; SSE41-X86-NEXT: flds (%esp)
; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: popl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: ftrunc32:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundss $11, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: ftrunc32:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-X86-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: ftrunc32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call float @llvm.experimental.constrained.trunc.f32(
float %f, metadata !"fpexcept.strict") #0
ret float %res
}
define double @ftruncf64(double %f) #0 {
; SSE41-X86-LABEL: ftruncf64:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: .cfi_offset %ebp, -8
; SSE41-X86-NEXT: movl %esp, %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
; SSE41-X86-NEXT: andl $-8, %esp
; SSE41-X86-NEXT: subl $8, %esp
; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE41-X86-NEXT: roundsd $11, %xmm0, %xmm0
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
; SSE41-X86-NEXT: fldl (%esp)
; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: movl %ebp, %esp
; SSE41-X86-NEXT: popl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: ftruncf64:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundsd $11, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: ftruncf64:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %ebp
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: .cfi_offset %ebp, -8
; AVX-X86-NEXT: movl %esp, %ebp
; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
; AVX-X86-NEXT: andl $-8, %esp
; AVX-X86-NEXT: subl $8, %esp
; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-X86-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: ftruncf64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call double @llvm.experimental.constrained.trunc.f64(
double %f, metadata !"fpexcept.strict") #0
ret double %res
}
define float @frint32(float %f) #0 {
; SSE41-X86-LABEL: frint32:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-X86-NEXT: roundss $4, %xmm0, %xmm0
; SSE41-X86-NEXT: movss %xmm0, (%esp)
; SSE41-X86-NEXT: flds (%esp)
; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: popl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: frint32:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundss $4, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: frint32:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-X86-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: frint32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call float @llvm.experimental.constrained.rint.f32(
float %f,
metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret float %res
}
define double @frintf64(double %f) #0 {
; SSE41-X86-LABEL: frintf64:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: .cfi_offset %ebp, -8
; SSE41-X86-NEXT: movl %esp, %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
; SSE41-X86-NEXT: andl $-8, %esp
; SSE41-X86-NEXT: subl $8, %esp
; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE41-X86-NEXT: roundsd $4, %xmm0, %xmm0
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
; SSE41-X86-NEXT: fldl (%esp)
; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: movl %ebp, %esp
; SSE41-X86-NEXT: popl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: frintf64:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundsd $4, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: frintf64:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %ebp
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: .cfi_offset %ebp, -8
; AVX-X86-NEXT: movl %esp, %ebp
; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
; AVX-X86-NEXT: andl $-8, %esp
; AVX-X86-NEXT: subl $8, %esp
; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-X86-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: frintf64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call double @llvm.experimental.constrained.rint.f64(
double %f,
metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret double %res
}
define float @fnearbyint32(float %f) #0 {
; SSE41-X86-LABEL: fnearbyint32:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-X86-NEXT: roundss $12, %xmm0, %xmm0
; SSE41-X86-NEXT: movss %xmm0, (%esp)
; SSE41-X86-NEXT: flds (%esp)
; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: popl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: fnearbyint32:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundss $12, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: fnearbyint32:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-X86-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: fnearbyint32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call float @llvm.experimental.constrained.nearbyint.f32(
float %f,
metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret float %res
}
define double @fnearbyintf64(double %f) #0 {
; SSE41-X86-LABEL: fnearbyintf64:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: .cfi_offset %ebp, -8
; SSE41-X86-NEXT: movl %esp, %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
; SSE41-X86-NEXT: andl $-8, %esp
; SSE41-X86-NEXT: subl $8, %esp
; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE41-X86-NEXT: roundsd $12, %xmm0, %xmm0
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
; SSE41-X86-NEXT: fldl (%esp)
; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: movl %ebp, %esp
; SSE41-X86-NEXT: popl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: fnearbyintf64:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundsd $12, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: fnearbyintf64:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %ebp
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: .cfi_offset %ebp, -8
; AVX-X86-NEXT: movl %esp, %ebp
; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
; AVX-X86-NEXT: andl $-8, %esp
; AVX-X86-NEXT: subl $8, %esp
; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-X86-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: fnearbyintf64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call double @llvm.experimental.constrained.nearbyint.f64(
double %f,
metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret double %res
}
define float @fround32(float %f) #0 {
; SSE41-X86-LABEL: fround32:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-X86-NEXT: movss %xmm0, (%esp)
; SSE41-X86-NEXT: calll roundf
; SSE41-X86-NEXT: popl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: fround32:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: pushq %rax
; SSE41-X64-NEXT: .cfi_def_cfa_offset 16
; SSE41-X64-NEXT: callq roundf@PLT
; SSE41-X64-NEXT: popq %rax
; SSE41-X64-NEXT: .cfi_def_cfa_offset 8
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: fround32:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: calll roundf
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: fround32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: pushq %rax
; AVX-X64-NEXT: .cfi_def_cfa_offset 16
; AVX-X64-NEXT: callq roundf@PLT
; AVX-X64-NEXT: popq %rax
; AVX-X64-NEXT: .cfi_def_cfa_offset 8
; AVX-X64-NEXT: retq
%res = call float @llvm.experimental.constrained.round.f32(
float %f, metadata !"fpexcept.strict") #0
ret float %res
}
define double @froundf64(double %f) #0 {
; SSE41-X86-LABEL: froundf64:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: subl $8, %esp
; SSE41-X86-NEXT: .cfi_def_cfa_offset 12
; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
; SSE41-X86-NEXT: calll round
; SSE41-X86-NEXT: addl $8, %esp
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: froundf64:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: pushq %rax
; SSE41-X64-NEXT: .cfi_def_cfa_offset 16
; SSE41-X64-NEXT: callq round@PLT
; SSE41-X64-NEXT: popq %rax
; SSE41-X64-NEXT: .cfi_def_cfa_offset 8
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: froundf64:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: subl $8, %esp
; AVX-X86-NEXT: .cfi_def_cfa_offset 12
; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: calll round
; AVX-X86-NEXT: addl $8, %esp
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: froundf64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: pushq %rax
; AVX-X64-NEXT: .cfi_def_cfa_offset 16
; AVX-X64-NEXT: callq round@PLT
; AVX-X64-NEXT: popq %rax
; AVX-X64-NEXT: .cfi_def_cfa_offset 8
; AVX-X64-NEXT: retq
%res = call double @llvm.experimental.constrained.round.f64(
double %f, metadata !"fpexcept.strict") #0
ret double %res
}
define float @froundeven32(float %f) #0 {
; SSE41-X86-LABEL: froundeven32:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-X86-NEXT: roundss $8, %xmm0, %xmm0
; SSE41-X86-NEXT: movss %xmm0, (%esp)
; SSE41-X86-NEXT: flds (%esp)
; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: popl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: froundeven32:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundss $8, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: froundeven32:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-X86-NEXT: vroundss $8, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: froundeven32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundss $8, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call float @llvm.experimental.constrained.roundeven.f32(
float %f, metadata !"fpexcept.strict") #0
ret float %res
}
define double @froundevenf64(double %f) #0 {
; SSE41-X86-LABEL: froundevenf64:
; SSE41-X86: # %bb.0:
; SSE41-X86-NEXT: pushl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
; SSE41-X86-NEXT: .cfi_offset %ebp, -8
; SSE41-X86-NEXT: movl %esp, %ebp
; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
; SSE41-X86-NEXT: andl $-8, %esp
; SSE41-X86-NEXT: subl $8, %esp
; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE41-X86-NEXT: roundsd $8, %xmm0, %xmm0
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
; SSE41-X86-NEXT: fldl (%esp)
; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: movl %ebp, %esp
; SSE41-X86-NEXT: popl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
; SSE41-X86-NEXT: retl
;
; SSE41-X64-LABEL: froundevenf64:
; SSE41-X64: # %bb.0:
; SSE41-X64-NEXT: roundsd $8, %xmm0, %xmm0
; SSE41-X64-NEXT: retq
;
; AVX-X86-LABEL: froundevenf64:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %ebp
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: .cfi_offset %ebp, -8
; AVX-X86-NEXT: movl %esp, %ebp
; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
; AVX-X86-NEXT: andl $-8, %esp
; AVX-X86-NEXT: subl $8, %esp
; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-X86-NEXT: vroundsd $8, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: froundevenf64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vroundsd $8, %xmm0, %xmm0, %xmm0
; AVX-X64-NEXT: retq
%res = call double @llvm.experimental.constrained.roundeven.f64(
double %f, metadata !"fpexcept.strict") #0
ret double %res
}
attributes #0 = { strictfp }