
Remove `UnsafeFPMath` in NVPTX part, it blocks some bugfixes related to clang and the ultimate goal is to remove `resetTargetOptions` method in `TargetMachine`, see FIXME in `resetTargetOptions`. See also https://discourse.llvm.org/t/rfc-honor-pragmas-with-ffp-contract-fast https://discourse.llvm.org/t/allowfpopfusion-vs-sdnodeflags-hasallowcontract
317 lines
10 KiB
LLVM
317 lines
10 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc < %s -mcpu=sm_60 | FileCheck %s
|
|
|
|
|
|
target triple = "nvptx64-unknown-cuda"
|
|
|
|
define half @frem_f16(half %a, half %b) {
|
|
; CHECK-LABEL: frem_f16(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .pred %p<2>;
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [frem_f16_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [frem_f16_param_1];
|
|
; CHECK-NEXT: cvt.f32.f16 %r1, %rs2;
|
|
; CHECK-NEXT: cvt.f32.f16 %r2, %rs1;
|
|
; CHECK-NEXT: div.rn.f32 %r3, %r2, %r1;
|
|
; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
|
|
; CHECK-NEXT: neg.f32 %r5, %r4;
|
|
; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2;
|
|
; CHECK-NEXT: testp.infinite.f32 %p1, %r1;
|
|
; CHECK-NEXT: selp.f32 %r7, %r2, %r6, %p1;
|
|
; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r7;
|
|
; CHECK-NEXT: st.param.b16 [func_retval0], %rs3;
|
|
; CHECK-NEXT: ret;
|
|
%r = frem half %a, %b
|
|
ret half %r
|
|
}
|
|
|
|
define half @frem_f16_fast(half %a, half %b) {
|
|
; CHECK-LABEL: frem_f16_fast(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<7>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [frem_f16_fast_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [frem_f16_fast_param_1];
|
|
; CHECK-NEXT: cvt.f32.f16 %r1, %rs2;
|
|
; CHECK-NEXT: cvt.f32.f16 %r2, %rs1;
|
|
; CHECK-NEXT: div.approx.f32 %r3, %r2, %r1;
|
|
; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
|
|
; CHECK-NEXT: neg.f32 %r5, %r4;
|
|
; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2;
|
|
; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r6;
|
|
; CHECK-NEXT: st.param.b16 [func_retval0], %rs3;
|
|
; CHECK-NEXT: ret;
|
|
%r = frem afn ninf half %a, %b
|
|
ret half %r
|
|
}
|
|
|
|
define float @frem_f32(float %a, float %b) {
|
|
; CHECK-LABEL: frem_f32(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .pred %p<2>;
|
|
; CHECK-NEXT: .reg .b32 %r<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [frem_f32_param_1];
|
|
; CHECK-NEXT: div.rn.f32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
|
|
; CHECK-NEXT: neg.f32 %r5, %r4;
|
|
; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1;
|
|
; CHECK-NEXT: testp.infinite.f32 %p1, %r2;
|
|
; CHECK-NEXT: selp.f32 %r7, %r1, %r6, %p1;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r7;
|
|
; CHECK-NEXT: ret;
|
|
%r = frem float %a, %b
|
|
ret float %r
|
|
}
|
|
|
|
define float @frem_f32_fast(float %a, float %b) {
|
|
; CHECK-LABEL: frem_f32_fast(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<7>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_fast_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [frem_f32_fast_param_1];
|
|
; CHECK-NEXT: div.approx.f32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
|
|
; CHECK-NEXT: neg.f32 %r5, %r4;
|
|
; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
|
|
; CHECK-NEXT: ret;
|
|
%r = frem afn ninf float %a, %b
|
|
ret float %r
|
|
}
|
|
|
|
define double @frem_f64(double %a, double %b) {
|
|
; CHECK-LABEL: frem_f64(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .pred %p<2>;
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [frem_f64_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [frem_f64_param_1];
|
|
; CHECK-NEXT: div.rn.f64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: cvt.rzi.f64.f64 %rd4, %rd3;
|
|
; CHECK-NEXT: neg.f64 %rd5, %rd4;
|
|
; CHECK-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1;
|
|
; CHECK-NEXT: testp.infinite.f64 %p1, %rd2;
|
|
; CHECK-NEXT: selp.f64 %rd7, %rd1, %rd6, %p1;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd7;
|
|
; CHECK-NEXT: ret;
|
|
%r = frem double %a, %b
|
|
ret double %r
|
|
}
|
|
|
|
define double @frem_f64_fast(double %a, double %b) {
|
|
; CHECK-LABEL: frem_f64_fast(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<7>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [frem_f64_fast_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [frem_f64_fast_param_1];
|
|
; CHECK-NEXT: div.rn.f64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: cvt.rzi.f64.f64 %rd4, %rd3;
|
|
; CHECK-NEXT: neg.f64 %rd5, %rd4;
|
|
; CHECK-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
|
|
; CHECK-NEXT: ret;
|
|
%r = frem afn ninf double %a, %b
|
|
ret double %r
|
|
}
|
|
|
|
define half @frem_f16_ninf(half %a, half %b) {
|
|
; CHECK-LABEL: frem_f16_ninf(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<7>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [frem_f16_ninf_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [frem_f16_ninf_param_1];
|
|
; CHECK-NEXT: cvt.f32.f16 %r1, %rs2;
|
|
; CHECK-NEXT: cvt.f32.f16 %r2, %rs1;
|
|
; CHECK-NEXT: div.rn.f32 %r3, %r2, %r1;
|
|
; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
|
|
; CHECK-NEXT: neg.f32 %r5, %r4;
|
|
; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2;
|
|
; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r6;
|
|
; CHECK-NEXT: st.param.b16 [func_retval0], %rs3;
|
|
; CHECK-NEXT: ret;
|
|
%r = frem ninf half %a, %b
|
|
ret half %r
|
|
}
|
|
|
|
define half @frem_f16_ninf_fast(half %a, half %b) {
|
|
; CHECK-LABEL: frem_f16_ninf_fast(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<7>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [frem_f16_ninf_fast_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [frem_f16_ninf_fast_param_1];
|
|
; CHECK-NEXT: cvt.f32.f16 %r1, %rs2;
|
|
; CHECK-NEXT: cvt.f32.f16 %r2, %rs1;
|
|
; CHECK-NEXT: div.approx.f32 %r3, %r2, %r1;
|
|
; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
|
|
; CHECK-NEXT: neg.f32 %r5, %r4;
|
|
; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2;
|
|
; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r6;
|
|
; CHECK-NEXT: st.param.b16 [func_retval0], %rs3;
|
|
; CHECK-NEXT: ret;
|
|
%r = frem afn ninf half %a, %b
|
|
ret half %r
|
|
}
|
|
|
|
define float @frem_f32_ninf(float %a, float %b) {
|
|
; CHECK-LABEL: frem_f32_ninf(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<7>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_ninf_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [frem_f32_ninf_param_1];
|
|
; CHECK-NEXT: div.rn.f32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
|
|
; CHECK-NEXT: neg.f32 %r5, %r4;
|
|
; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
|
|
; CHECK-NEXT: ret;
|
|
%r = frem ninf float %a, %b
|
|
ret float %r
|
|
}
|
|
|
|
define float @frem_f32_ninf_fast(float %a, float %b) {
|
|
; CHECK-LABEL: frem_f32_ninf_fast(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<7>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_ninf_fast_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [frem_f32_ninf_fast_param_1];
|
|
; CHECK-NEXT: div.approx.f32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
|
|
; CHECK-NEXT: neg.f32 %r5, %r4;
|
|
; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
|
|
; CHECK-NEXT: ret;
|
|
%r = frem afn ninf float %a, %b
|
|
ret float %r
|
|
}
|
|
|
|
define double @frem_f64_ninf(double %a, double %b) {
|
|
; CHECK-LABEL: frem_f64_ninf(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<7>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [frem_f64_ninf_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [frem_f64_ninf_param_1];
|
|
; CHECK-NEXT: div.rn.f64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: cvt.rzi.f64.f64 %rd4, %rd3;
|
|
; CHECK-NEXT: neg.f64 %rd5, %rd4;
|
|
; CHECK-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
|
|
; CHECK-NEXT: ret;
|
|
%r = frem ninf double %a, %b
|
|
ret double %r
|
|
}
|
|
|
|
define double @frem_f64_ninf_fast(double %a, double %b) {
|
|
; CHECK-LABEL: frem_f64_ninf_fast(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<7>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [frem_f64_ninf_fast_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [frem_f64_ninf_fast_param_1];
|
|
; CHECK-NEXT: div.rn.f64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: cvt.rzi.f64.f64 %rd4, %rd3;
|
|
; CHECK-NEXT: neg.f64 %rd5, %rd4;
|
|
; CHECK-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
|
|
; CHECK-NEXT: ret;
|
|
%r = frem afn ninf double %a, %b
|
|
ret double %r
|
|
}
|
|
|
|
define float @frem_f32_imm1_fast(float %a) {
|
|
; CHECK-LABEL: frem_f32_imm1_fast(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<5>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_imm1_fast_param_0];
|
|
; CHECK-NEXT: mul.rn.f32 %r2, %r1, 0f3E124925;
|
|
; CHECK-NEXT: cvt.rzi.f32.f32 %r3, %r2;
|
|
; CHECK-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
|
|
; CHECK-NEXT: ret;
|
|
%r = frem arcp float %a, 7.0
|
|
ret float %r
|
|
}
|
|
define float @frem_f32_imm1_normal(float %a) {
|
|
; CHECK-LABEL: frem_f32_imm1_normal(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<5>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_imm1_normal_param_0];
|
|
; CHECK-NEXT: div.rn.f32 %r2, %r1, 0f40E00000;
|
|
; CHECK-NEXT: cvt.rzi.f32.f32 %r3, %r2;
|
|
; CHECK-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
|
|
; CHECK-NEXT: ret;
|
|
%r = frem float %a, 7.0
|
|
ret float %r
|
|
}
|
|
|
|
define float @frem_f32_imm2(float %a) {
|
|
; CHECK-LABEL: frem_f32_imm2(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .pred %p<2>;
|
|
; CHECK-NEXT: .reg .b32 %r<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_imm2_param_0];
|
|
; CHECK-NEXT: mov.b32 %r2, 0f40E00000;
|
|
; CHECK-NEXT: div.rn.f32 %r3, %r2, %r1;
|
|
; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
|
|
; CHECK-NEXT: neg.f32 %r5, %r4;
|
|
; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r1, 0f40E00000;
|
|
; CHECK-NEXT: testp.infinite.f32 %p1, %r1;
|
|
; CHECK-NEXT: selp.f32 %r7, 0f40E00000, %r6, %p1;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r7;
|
|
; CHECK-NEXT: ret;
|
|
%r = frem float 7.0, %a
|
|
ret float %r
|
|
}
|
|
|
|
define float @frem_f32_imm2_fast(float %a) {
|
|
; CHECK-LABEL: frem_f32_imm2_fast(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<7>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_imm2_fast_param_0];
|
|
; CHECK-NEXT: mov.b32 %r2, 0f40E00000;
|
|
; CHECK-NEXT: div.approx.f32 %r3, %r2, %r1;
|
|
; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
|
|
; CHECK-NEXT: neg.f32 %r5, %r4;
|
|
; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r1, 0f40E00000;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
|
|
; CHECK-NEXT: ret;
|
|
%r = frem afn ninf float 7.0, %a
|
|
ret float %r
|
|
}
|