; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16 ; RUN: llc < %s -mcpu=sm_80 -mattr +ptx70 | FileCheck %s --check-prefixes=CHECK,CHECK-F16 ; RUN: llc < %s -mcpu=sm_80 -mattr +ptx70 --nvptx-no-f16-math | FileCheck %s --check-prefixes=CHECK,CHECK-SM80-NOF16 ; RUN: %if ptxas %{ llc < %s | %ptxas-verify %} ; RUN: %if ptxas-11.0 %{ llc < %s -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %} ; RUN: %if ptxas-11.0 %{ llc < %s -mcpu=sm_80 --nvptx-no-f16-math | %ptxas-verify -arch=sm_80 %} target triple = "nvptx64-nvidia-cuda" ; Checks that llvm intrinsics for math functions are correctly lowered to PTX. declare float @llvm.ceil.f32(float) #0 declare double @llvm.ceil.f64(double) #0 declare float @llvm.floor.f32(float) #0 declare double @llvm.floor.f64(double) #0 declare float @llvm.round.f32(float) #0 declare double @llvm.round.f64(double) #0 declare float @llvm.nearbyint.f32(float) #0 declare double @llvm.nearbyint.f64(double) #0 declare float @llvm.rint.f32(float) #0 declare double @llvm.rint.f64(double) #0 declare float @llvm.roundeven.f32(float) #0 declare double @llvm.roundeven.f64(double) #0 declare float @llvm.trunc.f32(float) #0 declare double @llvm.trunc.f64(double) #0 declare float @llvm.fabs.f32(float) #0 declare double @llvm.fabs.f64(double) #0 declare half @llvm.minnum.f16(half, half) #0 declare float @llvm.minnum.f32(float, float) #0 declare double @llvm.minnum.f64(double, double) #0 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #0 declare half @llvm.minimum.f16(half, half) #0 declare float @llvm.minimum.f32(float, float) #0 declare double @llvm.minimum.f64(double, double) #0 declare <2 x half> @llvm.minimum.v2f16(<2 x half>, <2 x half>) #0 declare half @llvm.maxnum.f16(half, half) #0 declare float @llvm.maxnum.f32(float, float) #0 declare double @llvm.maxnum.f64(double, double) #0 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #0 declare half @llvm.maximum.f16(half, half) #0 declare float @llvm.maximum.f32(float, float) #0 declare double @llvm.maximum.f64(double, double) #0 declare <2 x half> @llvm.maximum.v2f16(<2 x half>, <2 x half>) #0 declare float @llvm.fma.f32(float, float, float) #0 declare double @llvm.fma.f64(double, double, double) #0 ; ---- ceil ---- define float @ceil_float(float %a) { ; CHECK-LABEL: ceil_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [ceil_float_param_0]; ; CHECK-NEXT: cvt.rpi.f32.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %b = call float @llvm.ceil.f32(float %a) ret float %b } define float @ceil_float_ftz(float %a) #1 { ; CHECK-LABEL: ceil_float_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [ceil_float_ftz_param_0]; ; CHECK-NEXT: cvt.rpi.ftz.f32.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %b = call float @llvm.ceil.f32(float %a) ret float %b } define double @ceil_double(double %a) { ; CHECK-LABEL: ceil_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [ceil_double_param_0]; ; CHECK-NEXT: cvt.rpi.f64.f64 %rd2, %rd1; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; ; CHECK-NEXT: ret; %b = call double @llvm.ceil.f64(double %a) ret double %b } ; ---- floor ---- define float @floor_float(float %a) { ; CHECK-LABEL: floor_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [floor_float_param_0]; ; CHECK-NEXT: cvt.rmi.f32.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %b = call float @llvm.floor.f32(float %a) ret float %b } define float @floor_float_ftz(float %a) #1 { ; CHECK-LABEL: floor_float_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [floor_float_ftz_param_0]; ; CHECK-NEXT: cvt.rmi.ftz.f32.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %b = call float @llvm.floor.f32(float %a) ret float %b } define double @floor_double(double %a) { ; CHECK-LABEL: floor_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [floor_double_param_0]; ; CHECK-NEXT: cvt.rmi.f64.f64 %rd2, %rd1; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; ; CHECK-NEXT: ret; %b = call double @llvm.floor.f64(double %a) ret double %b } ; ---- round ---- define float @round_float(float %a) { ; check the use of sign mask and 0.5 to implement round ; CHECK-LABEL: round_float( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<3>; ; CHECK-NEXT: .reg .b32 %r<10>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [round_float_param_0]; ; CHECK-NEXT: and.b32 %r2, %r1, -2147483648; ; CHECK-NEXT: or.b32 %r3, %r2, 1056964608; ; CHECK-NEXT: add.rn.f32 %r4, %r1, %r3; ; CHECK-NEXT: cvt.rzi.f32.f32 %r5, %r4; ; CHECK-NEXT: abs.f32 %r6, %r1; ; CHECK-NEXT: setp.gt.f32 %p1, %r6, 0f4B000000; ; CHECK-NEXT: selp.f32 %r7, %r1, %r5, %p1; ; CHECK-NEXT: cvt.rzi.f32.f32 %r8, %r1; ; CHECK-NEXT: setp.lt.f32 %p2, %r6, 0f3F000000; ; CHECK-NEXT: selp.f32 %r9, %r8, %r7, %p2; ; CHECK-NEXT: st.param.b32 [func_retval0], %r9; ; CHECK-NEXT: ret; %b = call float @llvm.round.f32(float %a) ret float %b } define float @round_float_ftz(float %a) #1 { ; check the use of sign mask and 0.5 to implement round ; CHECK-LABEL: round_float_ftz( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<3>; ; CHECK-NEXT: .reg .b32 %r<10>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [round_float_ftz_param_0]; ; CHECK-NEXT: and.b32 %r2, %r1, -2147483648; ; CHECK-NEXT: or.b32 %r3, %r2, 1056964608; ; CHECK-NEXT: add.rn.ftz.f32 %r4, %r1, %r3; ; CHECK-NEXT: cvt.rzi.ftz.f32.f32 %r5, %r4; ; CHECK-NEXT: abs.ftz.f32 %r6, %r1; ; CHECK-NEXT: setp.gt.ftz.f32 %p1, %r6, 0f4B000000; ; CHECK-NEXT: selp.f32 %r7, %r1, %r5, %p1; ; CHECK-NEXT: cvt.rzi.ftz.f32.f32 %r8, %r1; ; CHECK-NEXT: setp.lt.ftz.f32 %p2, %r6, 0f3F000000; ; CHECK-NEXT: selp.f32 %r9, %r8, %r7, %p2; ; CHECK-NEXT: st.param.b32 [func_retval0], %r9; ; CHECK-NEXT: ret; %b = call float @llvm.round.f32(float %a) ret float %b } define double @round_double(double %a) { ; check the use of 0.5 to implement round ; CHECK-LABEL: round_double( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<3>; ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [round_double_param_0]; ; CHECK-NEXT: abs.f64 %rd2, %rd1; ; CHECK-NEXT: setp.lt.f64 %p1, %rd2, 0d3FE0000000000000; ; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FE0000000000000; ; CHECK-NEXT: cvt.rzi.f64.f64 %rd4, %rd3; ; CHECK-NEXT: selp.f64 %rd5, 0d0000000000000000, %rd4, %p1; ; CHECK-NEXT: copysign.f64 %rd6, %rd1, %rd5; ; CHECK-NEXT: setp.gt.f64 %p2, %rd2, 0d4330000000000000; ; CHECK-NEXT: selp.f64 %rd7, %rd1, %rd6, %p2; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd7; ; CHECK-NEXT: ret; %b = call double @llvm.round.f64(double %a) ret double %b } ; ---- nearbyint ---- define float @nearbyint_float(float %a) { ; CHECK-LABEL: nearbyint_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [nearbyint_float_param_0]; ; CHECK-NEXT: cvt.rni.f32.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %b = call float @llvm.nearbyint.f32(float %a) ret float %b } define float @nearbyint_float_ftz(float %a) #1 { ; CHECK-LABEL: nearbyint_float_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [nearbyint_float_ftz_param_0]; ; CHECK-NEXT: cvt.rni.ftz.f32.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %b = call float @llvm.nearbyint.f32(float %a) ret float %b } define double @nearbyint_double(double %a) { ; CHECK-LABEL: nearbyint_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [nearbyint_double_param_0]; ; CHECK-NEXT: cvt.rni.f64.f64 %rd2, %rd1; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; ; CHECK-NEXT: ret; %b = call double @llvm.nearbyint.f64(double %a) ret double %b } ; ---- rint ---- define float @rint_float(float %a) { ; CHECK-LABEL: rint_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [rint_float_param_0]; ; CHECK-NEXT: cvt.rni.f32.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %b = call float @llvm.rint.f32(float %a) ret float %b } define float @rint_float_ftz(float %a) #1 { ; CHECK-LABEL: rint_float_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [rint_float_ftz_param_0]; ; CHECK-NEXT: cvt.rni.ftz.f32.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %b = call float @llvm.rint.f32(float %a) ret float %b } define double @rint_double(double %a) { ; CHECK-LABEL: rint_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [rint_double_param_0]; ; CHECK-NEXT: cvt.rni.f64.f64 %rd2, %rd1; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; ; CHECK-NEXT: ret; %b = call double @llvm.rint.f64(double %a) ret double %b } ; ---- roundeven ---- define float @roundeven_float(float %a) { ; CHECK-LABEL: roundeven_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [roundeven_float_param_0]; ; CHECK-NEXT: cvt.rni.f32.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %b = call float @llvm.roundeven.f32(float %a) ret float %b } define float @roundeven_float_ftz(float %a) #1 { ; CHECK-LABEL: roundeven_float_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [roundeven_float_ftz_param_0]; ; CHECK-NEXT: cvt.rni.ftz.f32.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %b = call float @llvm.roundeven.f32(float %a) ret float %b } define double @roundeven_double(double %a) { ; CHECK-LABEL: roundeven_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [roundeven_double_param_0]; ; CHECK-NEXT: cvt.rni.f64.f64 %rd2, %rd1; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; ; CHECK-NEXT: ret; %b = call double @llvm.roundeven.f64(double %a) ret double %b } ; ---- trunc ---- define float @trunc_float(float %a) { ; CHECK-LABEL: trunc_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [trunc_float_param_0]; ; CHECK-NEXT: cvt.rzi.f32.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %b = call float @llvm.trunc.f32(float %a) ret float %b } define float @trunc_float_ftz(float %a) #1 { ; CHECK-LABEL: trunc_float_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [trunc_float_ftz_param_0]; ; CHECK-NEXT: cvt.rzi.ftz.f32.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %b = call float @llvm.trunc.f32(float %a) ret float %b } define double @trunc_double(double %a) { ; CHECK-LABEL: trunc_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [trunc_double_param_0]; ; CHECK-NEXT: cvt.rzi.f64.f64 %rd2, %rd1; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; ; CHECK-NEXT: ret; %b = call double @llvm.trunc.f64(double %a) ret double %b } ; ---- abs ---- define float @abs_float(float %a) { ; CHECK-LABEL: abs_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [abs_float_param_0]; ; CHECK-NEXT: abs.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %b = call float @llvm.fabs.f32(float %a) ret float %b } define float @abs_float_ftz(float %a) #1 { ; CHECK-LABEL: abs_float_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [abs_float_ftz_param_0]; ; CHECK-NEXT: abs.ftz.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %b = call float @llvm.fabs.f32(float %a) ret float %b } define double @abs_double(double %a) { ; CHECK-LABEL: abs_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [abs_double_param_0]; ; CHECK-NEXT: abs.f64 %rd2, %rd1; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; ; CHECK-NEXT: ret; %b = call double @llvm.fabs.f64(double %a) ret double %b } ; ---- minnum ---- define half @minnum_half(half %a, half %b) { ; CHECK-NOF16-LABEL: minnum_half( ; CHECK-NOF16: { ; CHECK-NOF16-NEXT: .reg .b16 %rs<4>; ; CHECK-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b16 %rs1, [minnum_half_param_0]; ; CHECK-NOF16-NEXT: ld.param.b16 %rs2, [minnum_half_param_1]; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs2; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs1; ; CHECK-NOF16-NEXT: min.f32 %r3, %r2, %r1; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %r3; ; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs3; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minnum_half( ; CHECK-F16: { ; CHECK-F16-NEXT: .reg .b16 %rs<4>; ; CHECK-F16-EMPTY: ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b16 %rs1, [minnum_half_param_0]; ; CHECK-F16-NEXT: ld.param.b16 %rs2, [minnum_half_param_1]; ; CHECK-F16-NEXT: min.f16 %rs3, %rs1, %rs2; ; CHECK-F16-NEXT: st.param.b16 [func_retval0], %rs3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: minnum_half( ; CHECK-SM80-NOF16: { ; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<4>; ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs1, [minnum_half_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs2, [minnum_half_param_1]; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs2; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs1; ; CHECK-SM80-NOF16-NEXT: min.f32 %r3, %r2, %r1; ; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %r3; ; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs3; ; CHECK-SM80-NOF16-NEXT: ret; %x = call half @llvm.minnum.f16(half %a, half %b) ret half %x } define float @minnum_float(float %a, float %b) { ; CHECK-LABEL: minnum_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [minnum_float_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [minnum_float_param_1]; ; CHECK-NEXT: min.f32 %r3, %r1, %r2; ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %x = call float @llvm.minnum.f32(float %a, float %b) ret float %x } define float @minnum_imm1(float %a) { ; CHECK-LABEL: minnum_imm1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [minnum_imm1_param_0]; ; CHECK-NEXT: min.f32 %r2, %r1, 0f00000000; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %x = call float @llvm.minnum.f32(float %a, float 0.0) ret float %x } define float @minnum_imm2(float %a) { ; CHECK-LABEL: minnum_imm2( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [minnum_imm2_param_0]; ; CHECK-NEXT: min.f32 %r2, %r1, 0f00000000; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %x = call float @llvm.minnum.f32(float 0.0, float %a) ret float %x } define float @minnum_float_ftz(float %a, float %b) #1 { ; CHECK-LABEL: minnum_float_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [minnum_float_ftz_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [minnum_float_ftz_param_1]; ; CHECK-NEXT: min.ftz.f32 %r3, %r1, %r2; ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %x = call float @llvm.minnum.f32(float %a, float %b) ret float %x } define double @minnum_double(double %a, double %b) { ; CHECK-LABEL: minnum_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [minnum_double_param_0]; ; CHECK-NEXT: ld.param.b64 %rd2, [minnum_double_param_1]; ; CHECK-NEXT: min.f64 %rd3, %rd1, %rd2; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; ; CHECK-NEXT: ret; %x = call double @llvm.minnum.f64(double %a, double %b) ret double %x } define <2 x half> @minnum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-NOF16-LABEL: minnum_v2half( ; CHECK-NOF16: { ; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; ; CHECK-NOF16-NEXT: .reg .b32 %r<8>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [minnum_v2half_param_0]; ; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [minnum_v2half_param_1]; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs4; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NOF16-NEXT: min.f32 %r3, %r2, %r1; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r3; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs3; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; ; CHECK-NOF16-NEXT: min.f32 %r6, %r5, %r4; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r6; ; CHECK-NOF16-NEXT: mov.b32 %r7, {%rs6, %rs5}; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minnum_v2half( ; CHECK-F16: { ; CHECK-F16-NEXT: .reg .b32 %r<4>; ; CHECK-F16-EMPTY: ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b32 %r1, [minnum_v2half_param_0]; ; CHECK-F16-NEXT: ld.param.b32 %r2, [minnum_v2half_param_1]; ; CHECK-F16-NEXT: min.f16x2 %r3, %r1, %r2; ; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: minnum_v2half( ; CHECK-SM80-NOF16: { ; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<7>; ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<8>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [minnum_v2half_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [minnum_v2half_param_1]; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs4; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-SM80-NOF16-NEXT: min.f32 %r3, %r2, %r1; ; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r3; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r4, %rs3; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; ; CHECK-SM80-NOF16-NEXT: min.f32 %r6, %r5, %r4; ; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r6; ; CHECK-SM80-NOF16-NEXT: mov.b32 %r7, {%rs6, %rs5}; ; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r7; ; CHECK-SM80-NOF16-NEXT: ret; %x = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b) ret <2 x half> %x } ; ---- minimum ---- define half @minimum_half(half %a, half %b) { ; CHECK-NOF16-LABEL: minimum_half( ; CHECK-NOF16: { ; CHECK-NOF16-NEXT: .reg .pred %p<6>; ; CHECK-NOF16-NEXT: .reg .b16 %rs<8>; ; CHECK-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b16 %rs1, [minimum_half_param_0]; ; CHECK-NOF16-NEXT: ld.param.b16 %rs2, [minimum_half_param_1]; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs2; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs1; ; CHECK-NOF16-NEXT: setp.lt.f32 %p1, %r2, %r1; ; CHECK-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; ; CHECK-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2; ; CHECK-NOF16-NEXT: setp.eq.b16 %p3, %rs1, -32768; ; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; ; CHECK-NOF16-NEXT: setp.eq.b16 %p4, %rs2, -32768; ; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; ; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; ; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5; ; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs7; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minimum_half( ; CHECK-F16: { ; CHECK-F16-NEXT: .reg .b16 %rs<4>; ; CHECK-F16-EMPTY: ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b16 %rs1, [minimum_half_param_0]; ; CHECK-F16-NEXT: ld.param.b16 %rs2, [minimum_half_param_1]; ; CHECK-F16-NEXT: min.NaN.f16 %rs3, %rs1, %rs2; ; CHECK-F16-NEXT: st.param.b16 [func_retval0], %rs3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: minimum_half( ; CHECK-SM80-NOF16: { ; CHECK-SM80-NOF16-NEXT: .reg .pred %p<6>; ; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<8>; ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs1, [minimum_half_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs2, [minimum_half_param_1]; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs2; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs1; ; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p1, %r2, %r1; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2; ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p3, %rs1, -32768; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p4, %rs2, -32768; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5; ; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs7; ; CHECK-SM80-NOF16-NEXT: ret; %x = call half @llvm.minimum.f16(half %a, half %b) ret half %x } define float @minimum_float(float %a, float %b) { ; CHECK-NOF16-LABEL: minimum_float( ; CHECK-NOF16: { ; CHECK-NOF16-NEXT: .reg .pred %p<5>; ; CHECK-NOF16-NEXT: .reg .b32 %r<8>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_float_param_0]; ; CHECK-NOF16-NEXT: ld.param.b32 %r2, [minimum_float_param_1]; ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r2; ; CHECK-NOF16-NEXT: min.f32 %r3, %r1, %r2; ; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; ; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, -2147483648; ; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2; ; CHECK-NOF16-NEXT: setp.eq.b32 %p3, %r2, -2147483648; ; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3; ; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r4, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minimum_float( ; CHECK-F16: { ; CHECK-F16-NEXT: .reg .b32 %r<4>; ; CHECK-F16-EMPTY: ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b32 %r1, [minimum_float_param_0]; ; CHECK-F16-NEXT: ld.param.b32 %r2, [minimum_float_param_1]; ; CHECK-F16-NEXT: min.NaN.f32 %r3, %r1, %r2; ; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: minimum_float( ; CHECK-SM80-NOF16: { ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [minimum_float_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [minimum_float_param_1]; ; CHECK-SM80-NOF16-NEXT: min.NaN.f32 %r3, %r1, %r2; ; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-SM80-NOF16-NEXT: ret; %x = call float @llvm.minimum.f32(float %a, float %b) ret float %x } define float @minimum_imm1(float %a) { ; CHECK-NOF16-LABEL: minimum_imm1( ; CHECK-NOF16: { ; CHECK-NOF16-NEXT: .reg .pred %p<4>; ; CHECK-NOF16-NEXT: .reg .b32 %r<6>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_imm1_param_0]; ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1; ; CHECK-NOF16-NEXT: min.f32 %r2, %r1, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1; ; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, -2147483648; ; CHECK-NOF16-NEXT: selp.f32 %r4, %r1, %r3, %p2; ; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r3, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r5, %r4, %r3, %p3; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minimum_imm1( ; CHECK-F16: { ; CHECK-F16-NEXT: .reg .b32 %r<3>; ; CHECK-F16-EMPTY: ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b32 %r1, [minimum_imm1_param_0]; ; CHECK-F16-NEXT: min.NaN.f32 %r2, %r1, 0f00000000; ; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: minimum_imm1( ; CHECK-SM80-NOF16: { ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<3>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [minimum_imm1_param_0]; ; CHECK-SM80-NOF16-NEXT: min.NaN.f32 %r2, %r1, 0f00000000; ; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-SM80-NOF16-NEXT: ret; %x = call float @llvm.minimum.f32(float %a, float 0.0) ret float %x } define float @minimum_imm2(float %a) { ; CHECK-NOF16-LABEL: minimum_imm2( ; CHECK-NOF16: { ; CHECK-NOF16-NEXT: .reg .pred %p<4>; ; CHECK-NOF16-NEXT: .reg .b32 %r<6>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_imm2_param_0]; ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1; ; CHECK-NOF16-NEXT: min.f32 %r2, %r1, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1; ; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, -2147483648; ; CHECK-NOF16-NEXT: selp.f32 %r4, %r1, %r3, %p2; ; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r3, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r5, %r4, %r3, %p3; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minimum_imm2( ; CHECK-F16: { ; CHECK-F16-NEXT: .reg .b32 %r<3>; ; CHECK-F16-EMPTY: ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b32 %r1, [minimum_imm2_param_0]; ; CHECK-F16-NEXT: min.NaN.f32 %r2, %r1, 0f00000000; ; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: minimum_imm2( ; CHECK-SM80-NOF16: { ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<3>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [minimum_imm2_param_0]; ; CHECK-SM80-NOF16-NEXT: min.NaN.f32 %r2, %r1, 0f00000000; ; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-SM80-NOF16-NEXT: ret; %x = call float @llvm.minimum.f32(float 0.0, float %a) ret float %x } define float @minimum_float_ftz(float %a, float %b) #1 { ; CHECK-NOF16-LABEL: minimum_float_ftz( ; CHECK-NOF16: { ; CHECK-NOF16-NEXT: .reg .pred %p<5>; ; CHECK-NOF16-NEXT: .reg .b32 %r<8>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_float_ftz_param_0]; ; CHECK-NOF16-NEXT: ld.param.b32 %r2, [minimum_float_ftz_param_1]; ; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p1, %r1, %r2; ; CHECK-NOF16-NEXT: min.ftz.f32 %r3, %r1, %r2; ; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; ; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, -2147483648; ; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2; ; CHECK-NOF16-NEXT: setp.eq.b32 %p3, %r2, -2147483648; ; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3; ; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p4, %r4, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minimum_float_ftz( ; CHECK-F16: { ; CHECK-F16-NEXT: .reg .b32 %r<4>; ; CHECK-F16-EMPTY: ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b32 %r1, [minimum_float_ftz_param_0]; ; CHECK-F16-NEXT: ld.param.b32 %r2, [minimum_float_ftz_param_1]; ; CHECK-F16-NEXT: min.ftz.NaN.f32 %r3, %r1, %r2; ; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: minimum_float_ftz( ; CHECK-SM80-NOF16: { ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [minimum_float_ftz_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [minimum_float_ftz_param_1]; ; CHECK-SM80-NOF16-NEXT: min.ftz.NaN.f32 %r3, %r1, %r2; ; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-SM80-NOF16-NEXT: ret; %x = call float @llvm.minimum.f32(float %a, float %b) ret float %x } define double @minimum_double(double %a, double %b) { ; CHECK-LABEL: minimum_double( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<5>; ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [minimum_double_param_0]; ; CHECK-NEXT: ld.param.b64 %rd2, [minimum_double_param_1]; ; CHECK-NEXT: setp.nan.f64 %p1, %rd1, %rd2; ; CHECK-NEXT: min.f64 %rd3, %rd1, %rd2; ; CHECK-NEXT: selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1; ; CHECK-NEXT: setp.eq.b64 %p2, %rd1, -9223372036854775808; ; CHECK-NEXT: selp.f64 %rd5, %rd1, %rd4, %p2; ; CHECK-NEXT: setp.eq.b64 %p3, %rd2, -9223372036854775808; ; CHECK-NEXT: selp.f64 %rd6, %rd2, %rd5, %p3; ; CHECK-NEXT: setp.eq.f64 %p4, %rd4, 0d0000000000000000; ; CHECK-NEXT: selp.f64 %rd7, %rd6, %rd4, %p4; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd7; ; CHECK-NEXT: ret; %x = call double @llvm.minimum.f64(double %a, double %b) ret double %x } define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-NOF16-LABEL: minimum_v2half( ; CHECK-NOF16: { ; CHECK-NOF16-NEXT: .reg .pred %p<11>; ; CHECK-NOF16-NEXT: .reg .b16 %rs<15>; ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [minimum_v2half_param_0]; ; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [minimum_v2half_param_1]; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs4; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NOF16-NEXT: setp.lt.f32 %p1, %r2, %r1; ; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs2, %rs4, %p1; ; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; ; CHECK-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; ; CHECK-NOF16-NEXT: setp.eq.b16 %p3, %rs2, -32768; ; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs2, %rs6, %p3; ; CHECK-NOF16-NEXT: setp.eq.b16 %p4, %rs4, -32768; ; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs7, %p4; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs6; ; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; ; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs6, %p5; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs3; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; ; CHECK-NOF16-NEXT: setp.lt.f32 %p6, %r5, %r4; ; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs1, %rs3, %p6; ; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %r5, %r4; ; CHECK-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7; ; CHECK-NOF16-NEXT: setp.eq.b16 %p8, %rs1, -32768; ; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs1, %rs11, %p8; ; CHECK-NOF16-NEXT: setp.eq.b16 %p9, %rs3, -32768; ; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs3, %rs12, %p9; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs11; ; CHECK-NOF16-NEXT: setp.eq.f32 %p10, %r6, 0f00000000; ; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs13, %rs11, %p10; ; CHECK-NOF16-NEXT: st.param.v2.b16 [func_retval0], {%rs14, %rs9}; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minimum_v2half( ; CHECK-F16: { ; CHECK-F16-NEXT: .reg .b32 %r<4>; ; CHECK-F16-EMPTY: ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b32 %r1, [minimum_v2half_param_0]; ; CHECK-F16-NEXT: ld.param.b32 %r2, [minimum_v2half_param_1]; ; CHECK-F16-NEXT: min.NaN.f16x2 %r3, %r1, %r2; ; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: minimum_v2half( ; CHECK-SM80-NOF16: { ; CHECK-SM80-NOF16-NEXT: .reg .pred %p<11>; ; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<15>; ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [minimum_v2half_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [minimum_v2half_param_1]; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs4; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p1, %r2, %r1; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs2, %rs4, %p1; ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p3, %rs2, -32768; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs2, %rs6, %p3; ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p4, %rs4, -32768; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs7, %p4; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs6; ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs6, %p5; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r4, %rs3; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; ; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p6, %r5, %r4; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs1, %rs3, %p6; ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %r5, %r4; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7; ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p8, %rs1, -32768; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs1, %rs11, %p8; ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p9, %rs3, -32768; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs3, %rs12, %p9; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r6, %rs11; ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p10, %r6, 0f00000000; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs13, %rs11, %p10; ; CHECK-SM80-NOF16-NEXT: st.param.v2.b16 [func_retval0], {%rs14, %rs9}; ; CHECK-SM80-NOF16-NEXT: ret; %x = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b) ret <2 x half> %x } ; ---- maxnum ---- define half @maxnum_half(half %a, half %b) { ; CHECK-NOF16-LABEL: maxnum_half( ; CHECK-NOF16: { ; CHECK-NOF16-NEXT: .reg .b16 %rs<4>; ; CHECK-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b16 %rs1, [maxnum_half_param_0]; ; CHECK-NOF16-NEXT: ld.param.b16 %rs2, [maxnum_half_param_1]; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs2; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs1; ; CHECK-NOF16-NEXT: max.f32 %r3, %r2, %r1; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %r3; ; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs3; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maxnum_half( ; CHECK-F16: { ; CHECK-F16-NEXT: .reg .b16 %rs<4>; ; CHECK-F16-EMPTY: ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b16 %rs1, [maxnum_half_param_0]; ; CHECK-F16-NEXT: ld.param.b16 %rs2, [maxnum_half_param_1]; ; CHECK-F16-NEXT: max.f16 %rs3, %rs1, %rs2; ; CHECK-F16-NEXT: st.param.b16 [func_retval0], %rs3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: maxnum_half( ; CHECK-SM80-NOF16: { ; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<4>; ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs1, [maxnum_half_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs2, [maxnum_half_param_1]; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs2; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs1; ; CHECK-SM80-NOF16-NEXT: max.f32 %r3, %r2, %r1; ; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %r3; ; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs3; ; CHECK-SM80-NOF16-NEXT: ret; %x = call half @llvm.maxnum.f16(half %a, half %b) ret half %x } define float @maxnum_imm1(float %a) { ; CHECK-LABEL: maxnum_imm1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [maxnum_imm1_param_0]; ; CHECK-NEXT: max.f32 %r2, %r1, 0f00000000; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %x = call float @llvm.maxnum.f32(float %a, float 0.0) ret float %x } define float @maxnum_imm2(float %a) { ; CHECK-LABEL: maxnum_imm2( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [maxnum_imm2_param_0]; ; CHECK-NEXT: max.f32 %r2, %r1, 0f00000000; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %x = call float @llvm.maxnum.f32(float 0.0, float %a) ret float %x } define float @maxnum_float(float %a, float %b) { ; CHECK-LABEL: maxnum_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [maxnum_float_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [maxnum_float_param_1]; ; CHECK-NEXT: max.f32 %r3, %r1, %r2; ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %x = call float @llvm.maxnum.f32(float %a, float %b) ret float %x } define float @maxnum_float_ftz(float %a, float %b) #1 { ; CHECK-LABEL: maxnum_float_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [maxnum_float_ftz_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [maxnum_float_ftz_param_1]; ; CHECK-NEXT: max.ftz.f32 %r3, %r1, %r2; ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %x = call float @llvm.maxnum.f32(float %a, float %b) ret float %x } define double @maxnum_double(double %a, double %b) { ; CHECK-LABEL: maxnum_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [maxnum_double_param_0]; ; CHECK-NEXT: ld.param.b64 %rd2, [maxnum_double_param_1]; ; CHECK-NEXT: max.f64 %rd3, %rd1, %rd2; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; ; CHECK-NEXT: ret; %x = call double @llvm.maxnum.f64(double %a, double %b) ret double %x } define <2 x half> @maxnum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-NOF16-LABEL: maxnum_v2half( ; CHECK-NOF16: { ; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; ; CHECK-NOF16-NEXT: .reg .b32 %r<8>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [maxnum_v2half_param_0]; ; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [maxnum_v2half_param_1]; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs4; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NOF16-NEXT: max.f32 %r3, %r2, %r1; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r3; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs3; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; ; CHECK-NOF16-NEXT: max.f32 %r6, %r5, %r4; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r6; ; CHECK-NOF16-NEXT: mov.b32 %r7, {%rs6, %rs5}; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maxnum_v2half( ; CHECK-F16: { ; CHECK-F16-NEXT: .reg .b32 %r<4>; ; CHECK-F16-EMPTY: ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b32 %r1, [maxnum_v2half_param_0]; ; CHECK-F16-NEXT: ld.param.b32 %r2, [maxnum_v2half_param_1]; ; CHECK-F16-NEXT: max.f16x2 %r3, %r1, %r2; ; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: maxnum_v2half( ; CHECK-SM80-NOF16: { ; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<7>; ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<8>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [maxnum_v2half_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [maxnum_v2half_param_1]; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs4; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-SM80-NOF16-NEXT: max.f32 %r3, %r2, %r1; ; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r3; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r4, %rs3; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; ; CHECK-SM80-NOF16-NEXT: max.f32 %r6, %r5, %r4; ; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r6; ; CHECK-SM80-NOF16-NEXT: mov.b32 %r7, {%rs6, %rs5}; ; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r7; ; CHECK-SM80-NOF16-NEXT: ret; %x = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b) ret <2 x half> %x } ; ---- maximum ---- define half @maximum_half(half %a, half %b) { ; CHECK-NOF16-LABEL: maximum_half( ; CHECK-NOF16: { ; CHECK-NOF16-NEXT: .reg .pred %p<6>; ; CHECK-NOF16-NEXT: .reg .b16 %rs<8>; ; CHECK-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b16 %rs1, [maximum_half_param_0]; ; CHECK-NOF16-NEXT: ld.param.b16 %rs2, [maximum_half_param_1]; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs2; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs1; ; CHECK-NOF16-NEXT: setp.gt.f32 %p1, %r2, %r1; ; CHECK-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; ; CHECK-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2; ; CHECK-NOF16-NEXT: setp.eq.b16 %p3, %rs1, 0; ; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; ; CHECK-NOF16-NEXT: setp.eq.b16 %p4, %rs2, 0; ; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; ; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; ; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5; ; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs7; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_half( ; CHECK-F16: { ; CHECK-F16-NEXT: .reg .b16 %rs<4>; ; CHECK-F16-EMPTY: ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b16 %rs1, [maximum_half_param_0]; ; CHECK-F16-NEXT: ld.param.b16 %rs2, [maximum_half_param_1]; ; CHECK-F16-NEXT: max.NaN.f16 %rs3, %rs1, %rs2; ; CHECK-F16-NEXT: st.param.b16 [func_retval0], %rs3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: maximum_half( ; CHECK-SM80-NOF16: { ; CHECK-SM80-NOF16-NEXT: .reg .pred %p<6>; ; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<8>; ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs1, [maximum_half_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs2, [maximum_half_param_1]; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs2; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs1; ; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p1, %r2, %r1; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2; ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p3, %rs1, 0; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p4, %rs2, 0; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5; ; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs7; ; CHECK-SM80-NOF16-NEXT: ret; %x = call half @llvm.maximum.f16(half %a, half %b) ret half %x } define float @maximum_imm1(float %a) { ; CHECK-NOF16-LABEL: maximum_imm1( ; CHECK-NOF16: { ; CHECK-NOF16-NEXT: .reg .pred %p<3>; ; CHECK-NOF16-NEXT: .reg .b32 %r<5>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_imm1_param_0]; ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1; ; CHECK-NOF16-NEXT: max.f32 %r2, %r1, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1; ; CHECK-NOF16-NEXT: setp.eq.f32 %p2, %r3, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r4, 0f00000000, %r3, %p2; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_imm1( ; CHECK-F16: { ; CHECK-F16-NEXT: .reg .b32 %r<3>; ; CHECK-F16-EMPTY: ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b32 %r1, [maximum_imm1_param_0]; ; CHECK-F16-NEXT: max.NaN.f32 %r2, %r1, 0f00000000; ; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: maximum_imm1( ; CHECK-SM80-NOF16: { ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<3>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [maximum_imm1_param_0]; ; CHECK-SM80-NOF16-NEXT: max.NaN.f32 %r2, %r1, 0f00000000; ; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-SM80-NOF16-NEXT: ret; %x = call float @llvm.maximum.f32(float %a, float 0.0) ret float %x } define float @maximum_imm2(float %a) { ; CHECK-NOF16-LABEL: maximum_imm2( ; CHECK-NOF16: { ; CHECK-NOF16-NEXT: .reg .pred %p<3>; ; CHECK-NOF16-NEXT: .reg .b32 %r<5>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_imm2_param_0]; ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1; ; CHECK-NOF16-NEXT: max.f32 %r2, %r1, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1; ; CHECK-NOF16-NEXT: setp.eq.f32 %p2, %r3, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r4, 0f00000000, %r3, %p2; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_imm2( ; CHECK-F16: { ; CHECK-F16-NEXT: .reg .b32 %r<3>; ; CHECK-F16-EMPTY: ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b32 %r1, [maximum_imm2_param_0]; ; CHECK-F16-NEXT: max.NaN.f32 %r2, %r1, 0f00000000; ; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: maximum_imm2( ; CHECK-SM80-NOF16: { ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<3>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [maximum_imm2_param_0]; ; CHECK-SM80-NOF16-NEXT: max.NaN.f32 %r2, %r1, 0f00000000; ; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-SM80-NOF16-NEXT: ret; %x = call float @llvm.maximum.f32(float 0.0, float %a) ret float %x } define float @maximum_float(float %a, float %b) { ; CHECK-NOF16-LABEL: maximum_float( ; CHECK-NOF16: { ; CHECK-NOF16-NEXT: .reg .pred %p<5>; ; CHECK-NOF16-NEXT: .reg .b32 %r<8>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_float_param_0]; ; CHECK-NOF16-NEXT: ld.param.b32 %r2, [maximum_float_param_1]; ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r2; ; CHECK-NOF16-NEXT: max.f32 %r3, %r1, %r2; ; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; ; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, 0; ; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2; ; CHECK-NOF16-NEXT: setp.eq.b32 %p3, %r2, 0; ; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3; ; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r4, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_float( ; CHECK-F16: { ; CHECK-F16-NEXT: .reg .b32 %r<4>; ; CHECK-F16-EMPTY: ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b32 %r1, [maximum_float_param_0]; ; CHECK-F16-NEXT: ld.param.b32 %r2, [maximum_float_param_1]; ; CHECK-F16-NEXT: max.NaN.f32 %r3, %r1, %r2; ; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: maximum_float( ; CHECK-SM80-NOF16: { ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [maximum_float_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [maximum_float_param_1]; ; CHECK-SM80-NOF16-NEXT: max.NaN.f32 %r3, %r1, %r2; ; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-SM80-NOF16-NEXT: ret; %x = call float @llvm.maximum.f32(float %a, float %b) ret float %x } define float @maximum_float_ftz(float %a, float %b) #1 { ; CHECK-NOF16-LABEL: maximum_float_ftz( ; CHECK-NOF16: { ; CHECK-NOF16-NEXT: .reg .pred %p<5>; ; CHECK-NOF16-NEXT: .reg .b32 %r<8>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_float_ftz_param_0]; ; CHECK-NOF16-NEXT: ld.param.b32 %r2, [maximum_float_ftz_param_1]; ; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p1, %r1, %r2; ; CHECK-NOF16-NEXT: max.ftz.f32 %r3, %r1, %r2; ; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; ; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, 0; ; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2; ; CHECK-NOF16-NEXT: setp.eq.b32 %p3, %r2, 0; ; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3; ; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p4, %r4, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_float_ftz( ; CHECK-F16: { ; CHECK-F16-NEXT: .reg .b32 %r<4>; ; CHECK-F16-EMPTY: ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b32 %r1, [maximum_float_ftz_param_0]; ; CHECK-F16-NEXT: ld.param.b32 %r2, [maximum_float_ftz_param_1]; ; CHECK-F16-NEXT: max.ftz.NaN.f32 %r3, %r1, %r2; ; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: maximum_float_ftz( ; CHECK-SM80-NOF16: { ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [maximum_float_ftz_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [maximum_float_ftz_param_1]; ; CHECK-SM80-NOF16-NEXT: max.ftz.NaN.f32 %r3, %r1, %r2; ; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-SM80-NOF16-NEXT: ret; %x = call float @llvm.maximum.f32(float %a, float %b) ret float %x } define double @maximum_double(double %a, double %b) { ; CHECK-LABEL: maximum_double( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<5>; ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [maximum_double_param_0]; ; CHECK-NEXT: ld.param.b64 %rd2, [maximum_double_param_1]; ; CHECK-NEXT: setp.nan.f64 %p1, %rd1, %rd2; ; CHECK-NEXT: max.f64 %rd3, %rd1, %rd2; ; CHECK-NEXT: selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1; ; CHECK-NEXT: setp.eq.b64 %p2, %rd1, 0; ; CHECK-NEXT: selp.f64 %rd5, %rd1, %rd4, %p2; ; CHECK-NEXT: setp.eq.b64 %p3, %rd2, 0; ; CHECK-NEXT: selp.f64 %rd6, %rd2, %rd5, %p3; ; CHECK-NEXT: setp.eq.f64 %p4, %rd4, 0d0000000000000000; ; CHECK-NEXT: selp.f64 %rd7, %rd6, %rd4, %p4; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd7; ; CHECK-NEXT: ret; %x = call double @llvm.maximum.f64(double %a, double %b) ret double %x } define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-NOF16-LABEL: maximum_v2half( ; CHECK-NOF16: { ; CHECK-NOF16-NEXT: .reg .pred %p<11>; ; CHECK-NOF16-NEXT: .reg .b16 %rs<15>; ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [maximum_v2half_param_0]; ; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [maximum_v2half_param_1]; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs4; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NOF16-NEXT: setp.gt.f32 %p1, %r2, %r1; ; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs2, %rs4, %p1; ; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; ; CHECK-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; ; CHECK-NOF16-NEXT: setp.eq.b16 %p3, %rs2, 0; ; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs2, %rs6, %p3; ; CHECK-NOF16-NEXT: setp.eq.b16 %p4, %rs4, 0; ; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs7, %p4; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs6; ; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; ; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs6, %p5; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs3; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; ; CHECK-NOF16-NEXT: setp.gt.f32 %p6, %r5, %r4; ; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs1, %rs3, %p6; ; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %r5, %r4; ; CHECK-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7; ; CHECK-NOF16-NEXT: setp.eq.b16 %p8, %rs1, 0; ; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs1, %rs11, %p8; ; CHECK-NOF16-NEXT: setp.eq.b16 %p9, %rs3, 0; ; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs3, %rs12, %p9; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs11; ; CHECK-NOF16-NEXT: setp.eq.f32 %p10, %r6, 0f00000000; ; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs13, %rs11, %p10; ; CHECK-NOF16-NEXT: st.param.v2.b16 [func_retval0], {%rs14, %rs9}; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_v2half( ; CHECK-F16: { ; CHECK-F16-NEXT: .reg .b32 %r<4>; ; CHECK-F16-EMPTY: ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b32 %r1, [maximum_v2half_param_0]; ; CHECK-F16-NEXT: ld.param.b32 %r2, [maximum_v2half_param_1]; ; CHECK-F16-NEXT: max.NaN.f16x2 %r3, %r1, %r2; ; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: maximum_v2half( ; CHECK-SM80-NOF16: { ; CHECK-SM80-NOF16-NEXT: .reg .pred %p<11>; ; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<15>; ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [maximum_v2half_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [maximum_v2half_param_1]; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs4; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p1, %r2, %r1; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs2, %rs4, %p1; ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p3, %rs2, 0; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs2, %rs6, %p3; ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p4, %rs4, 0; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs7, %p4; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs6; ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs6, %p5; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r4, %rs3; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; ; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p6, %r5, %r4; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs1, %rs3, %p6; ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %r5, %r4; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7; ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p8, %rs1, 0; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs1, %rs11, %p8; ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p9, %rs3, 0; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs3, %rs12, %p9; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r6, %rs11; ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p10, %r6, 0f00000000; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs13, %rs11, %p10; ; CHECK-SM80-NOF16-NEXT: st.param.v2.b16 [func_retval0], {%rs14, %rs9}; ; CHECK-SM80-NOF16-NEXT: ret; %x = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b) ret <2 x half> %x } ; ---- fma ---- define float @fma_float(float %a, float %b, float %c) { ; CHECK-LABEL: fma_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [fma_float_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [fma_float_param_1]; ; CHECK-NEXT: ld.param.b32 %r3, [fma_float_param_2]; ; CHECK-NEXT: fma.rn.f32 %r4, %r1, %r2, %r3; ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %x = call float @llvm.fma.f32(float %a, float %b, float %c) ret float %x } define float @fma_float_ftz(float %a, float %b, float %c) #1 { ; CHECK-LABEL: fma_float_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [fma_float_ftz_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [fma_float_ftz_param_1]; ; CHECK-NEXT: ld.param.b32 %r3, [fma_float_ftz_param_2]; ; CHECK-NEXT: fma.rn.ftz.f32 %r4, %r1, %r2, %r3; ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %x = call float @llvm.fma.f32(float %a, float %b, float %c) ret float %x } define double @fma_double(double %a, double %b, double %c) { ; CHECK-LABEL: fma_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [fma_double_param_0]; ; CHECK-NEXT: ld.param.b64 %rd2, [fma_double_param_1]; ; CHECK-NEXT: ld.param.b64 %rd3, [fma_double_param_2]; ; CHECK-NEXT: fma.rn.f64 %rd4, %rd1, %rd2, %rd3; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd4; ; CHECK-NEXT: ret; %x = call double @llvm.fma.f64(double %a, double %b, double %c) ret double %x } attributes #0 = { nounwind readnone } attributes #1 = { "denormal-fp-math-f32" = "preserve-sign" }