
Without specifying -arch=sm_90, we get this error: ptxas fatal : SM version specified by .target is higher than default SM version assumed
1034 lines
31 KiB
LLVM
1034 lines
31 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -O3 | FileCheck %s --check-prefixes=CHECK,SM90
|
|
; RUN: llc < %s -mcpu=sm_20 -O3 | FileCheck %s --check-prefixes=CHECK,SM20
|
|
; RUN: %if ptxas-12.0 %{ llc < %s -mcpu=sm_90 -mattr=+ptx80 -O3 | %ptxas-verify -arch=sm_90 %}
|
|
; RUN: %if ptxas-12.0 %{ llc < %s -mcpu=sm_20 -O3 | %ptxas-verify %}
|
|
|
|
target triple = "nvptx64-nvidia-cuda"
|
|
|
|
; *************************************
|
|
; * Cases with no min/max
|
|
|
|
define i32 @ab_eq_i32(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: ab_eq_i32(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [ab_eq_i32_param_1];
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp eq i32 %a, %b
|
|
%sel = select i1 %cmp, i32 %a, i32 %b
|
|
ret i32 %sel
|
|
}
|
|
|
|
define i64 @ab_ne_i64(i64 %a, i64 %b) {
|
|
; CHECK-LABEL: ab_ne_i64(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [ab_ne_i64_param_1];
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd1;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp ne i64 %a, %b
|
|
%sel = select i1 %cmp, i64 %b, i64 %a
|
|
ret i64 %sel
|
|
}
|
|
|
|
; *************************************
|
|
; * All variations with i16
|
|
|
|
; *** ab, unsigned, i16
|
|
define i16 @ab_ugt_i16(i16 %a, i16 %b) {
|
|
; CHECK-LABEL: ab_ugt_i16(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [ab_ugt_i16_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [ab_ugt_i16_param_1];
|
|
; CHECK-NEXT: max.u16 %rs3, %rs1, %rs2;
|
|
; CHECK-NEXT: cvt.u32.u16 %r1, %rs3;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp ugt i16 %a, %b
|
|
%sel = select i1 %cmp, i16 %a, i16 %b
|
|
ret i16 %sel
|
|
}
|
|
|
|
define i16 @ab_uge_i16(i16 %a, i16 %b) {
|
|
; CHECK-LABEL: ab_uge_i16(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [ab_uge_i16_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [ab_uge_i16_param_1];
|
|
; CHECK-NEXT: max.u16 %rs3, %rs1, %rs2;
|
|
; CHECK-NEXT: cvt.u32.u16 %r1, %rs3;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp uge i16 %a, %b
|
|
%sel = select i1 %cmp, i16 %a, i16 %b
|
|
ret i16 %sel
|
|
}
|
|
|
|
define i16 @ab_ult_i16(i16 %a, i16 %b) {
|
|
; CHECK-LABEL: ab_ult_i16(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [ab_ult_i16_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [ab_ult_i16_param_1];
|
|
; CHECK-NEXT: min.u16 %rs3, %rs1, %rs2;
|
|
; CHECK-NEXT: cvt.u32.u16 %r1, %rs3;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp ult i16 %a, %b
|
|
%sel = select i1 %cmp, i16 %a, i16 %b
|
|
ret i16 %sel
|
|
}
|
|
|
|
define i16 @ab_ule_i16(i16 %a, i16 %b) {
|
|
; CHECK-LABEL: ab_ule_i16(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [ab_ule_i16_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [ab_ule_i16_param_1];
|
|
; CHECK-NEXT: min.u16 %rs3, %rs1, %rs2;
|
|
; CHECK-NEXT: cvt.u32.u16 %r1, %rs3;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp ule i16 %a, %b
|
|
%sel = select i1 %cmp, i16 %a, i16 %b
|
|
ret i16 %sel
|
|
}
|
|
|
|
; *** ab, signed, i16
|
|
define i16 @ab_sgt_i16(i16 %a, i16 %b) {
|
|
; CHECK-LABEL: ab_sgt_i16(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [ab_sgt_i16_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [ab_sgt_i16_param_1];
|
|
; CHECK-NEXT: max.s16 %rs3, %rs1, %rs2;
|
|
; CHECK-NEXT: cvt.u32.u16 %r1, %rs3;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp sgt i16 %a, %b
|
|
%sel = select i1 %cmp, i16 %a, i16 %b
|
|
ret i16 %sel
|
|
}
|
|
|
|
define i16 @ab_sge_i16(i16 %a, i16 %b) {
|
|
; CHECK-LABEL: ab_sge_i16(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [ab_sge_i16_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [ab_sge_i16_param_1];
|
|
; CHECK-NEXT: max.s16 %rs3, %rs1, %rs2;
|
|
; CHECK-NEXT: cvt.u32.u16 %r1, %rs3;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp sge i16 %a, %b
|
|
%sel = select i1 %cmp, i16 %a, i16 %b
|
|
ret i16 %sel
|
|
}
|
|
|
|
define i16 @ab_slt_i16(i16 %a, i16 %b) {
|
|
; CHECK-LABEL: ab_slt_i16(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [ab_slt_i16_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [ab_slt_i16_param_1];
|
|
; CHECK-NEXT: min.s16 %rs3, %rs1, %rs2;
|
|
; CHECK-NEXT: cvt.u32.u16 %r1, %rs3;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp slt i16 %a, %b
|
|
%sel = select i1 %cmp, i16 %a, i16 %b
|
|
ret i16 %sel
|
|
}
|
|
|
|
define i16 @ab_sle_i16(i16 %a, i16 %b) {
|
|
; CHECK-LABEL: ab_sle_i16(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [ab_sle_i16_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [ab_sle_i16_param_1];
|
|
; CHECK-NEXT: min.s16 %rs3, %rs1, %rs2;
|
|
; CHECK-NEXT: cvt.u32.u16 %r1, %rs3;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp sle i16 %a, %b
|
|
%sel = select i1 %cmp, i16 %a, i16 %b
|
|
ret i16 %sel
|
|
}
|
|
|
|
; *** ba, unsigned, i16
|
|
define i16 @ba_ugt_i16(i16 %a, i16 %b) {
|
|
; CHECK-LABEL: ba_ugt_i16(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [ba_ugt_i16_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [ba_ugt_i16_param_1];
|
|
; CHECK-NEXT: min.u16 %rs3, %rs1, %rs2;
|
|
; CHECK-NEXT: cvt.u32.u16 %r1, %rs3;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp ugt i16 %a, %b
|
|
%sel = select i1 %cmp, i16 %b, i16 %a
|
|
ret i16 %sel
|
|
}
|
|
|
|
define i16 @ba_uge_i16(i16 %a, i16 %b) {
|
|
; CHECK-LABEL: ba_uge_i16(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [ba_uge_i16_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [ba_uge_i16_param_1];
|
|
; CHECK-NEXT: min.u16 %rs3, %rs1, %rs2;
|
|
; CHECK-NEXT: cvt.u32.u16 %r1, %rs3;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp uge i16 %a, %b
|
|
%sel = select i1 %cmp, i16 %b, i16 %a
|
|
ret i16 %sel
|
|
}
|
|
|
|
define i16 @ba_ult_i16(i16 %a, i16 %b) {
|
|
; CHECK-LABEL: ba_ult_i16(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [ba_ult_i16_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [ba_ult_i16_param_1];
|
|
; CHECK-NEXT: max.u16 %rs3, %rs1, %rs2;
|
|
; CHECK-NEXT: cvt.u32.u16 %r1, %rs3;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp ult i16 %a, %b
|
|
%sel = select i1 %cmp, i16 %b, i16 %a
|
|
ret i16 %sel
|
|
}
|
|
|
|
define i16 @ba_ule_i16(i16 %a, i16 %b) {
|
|
; CHECK-LABEL: ba_ule_i16(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [ba_ule_i16_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [ba_ule_i16_param_1];
|
|
; CHECK-NEXT: max.u16 %rs3, %rs1, %rs2;
|
|
; CHECK-NEXT: cvt.u32.u16 %r1, %rs3;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp ule i16 %a, %b
|
|
%sel = select i1 %cmp, i16 %b, i16 %a
|
|
ret i16 %sel
|
|
}
|
|
|
|
; *** ba, signed, i16
|
|
define i16 @ba_sgt_i16(i16 %a, i16 %b) {
|
|
; CHECK-LABEL: ba_sgt_i16(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [ba_sgt_i16_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [ba_sgt_i16_param_1];
|
|
; CHECK-NEXT: min.s16 %rs3, %rs1, %rs2;
|
|
; CHECK-NEXT: cvt.u32.u16 %r1, %rs3;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp sgt i16 %a, %b
|
|
%sel = select i1 %cmp, i16 %b, i16 %a
|
|
ret i16 %sel
|
|
}
|
|
|
|
define i16 @ba_sge_i16(i16 %a, i16 %b) {
|
|
; CHECK-LABEL: ba_sge_i16(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [ba_sge_i16_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [ba_sge_i16_param_1];
|
|
; CHECK-NEXT: min.s16 %rs3, %rs1, %rs2;
|
|
; CHECK-NEXT: cvt.u32.u16 %r1, %rs3;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp sge i16 %a, %b
|
|
%sel = select i1 %cmp, i16 %b, i16 %a
|
|
ret i16 %sel
|
|
}
|
|
|
|
define i16 @ba_slt_i16(i16 %a, i16 %b) {
|
|
; CHECK-LABEL: ba_slt_i16(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [ba_slt_i16_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [ba_slt_i16_param_1];
|
|
; CHECK-NEXT: max.s16 %rs3, %rs1, %rs2;
|
|
; CHECK-NEXT: cvt.u32.u16 %r1, %rs3;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp slt i16 %a, %b
|
|
%sel = select i1 %cmp, i16 %b, i16 %a
|
|
ret i16 %sel
|
|
}
|
|
|
|
define i16 @ba_sle_i16(i16 %a, i16 %b) {
|
|
; CHECK-LABEL: ba_sle_i16(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b16 %rs<4>;
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b16 %rs1, [ba_sle_i16_param_0];
|
|
; CHECK-NEXT: ld.param.b16 %rs2, [ba_sle_i16_param_1];
|
|
; CHECK-NEXT: max.s16 %rs3, %rs1, %rs2;
|
|
; CHECK-NEXT: cvt.u32.u16 %r1, %rs3;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp sle i16 %a, %b
|
|
%sel = select i1 %cmp, i16 %b, i16 %a
|
|
ret i16 %sel
|
|
}
|
|
|
|
; *************************************
|
|
; * All variations with i32
|
|
|
|
; *** ab, unsigned, i32
|
|
define i32 @ab_ugt_i32(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: ab_ugt_i32(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [ab_ugt_i32_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [ab_ugt_i32_param_1];
|
|
; CHECK-NEXT: max.u32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp ugt i32 %a, %b
|
|
%sel = select i1 %cmp, i32 %a, i32 %b
|
|
ret i32 %sel
|
|
}
|
|
|
|
define i32 @ab_uge_i32(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: ab_uge_i32(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [ab_uge_i32_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [ab_uge_i32_param_1];
|
|
; CHECK-NEXT: max.u32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp uge i32 %a, %b
|
|
%sel = select i1 %cmp, i32 %a, i32 %b
|
|
ret i32 %sel
|
|
}
|
|
|
|
define i32 @ab_ult_i32(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: ab_ult_i32(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [ab_ult_i32_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [ab_ult_i32_param_1];
|
|
; CHECK-NEXT: min.u32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp ult i32 %a, %b
|
|
%sel = select i1 %cmp, i32 %a, i32 %b
|
|
ret i32 %sel
|
|
}
|
|
|
|
define i32 @ab_ule_i32(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: ab_ule_i32(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [ab_ule_i32_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [ab_ule_i32_param_1];
|
|
; CHECK-NEXT: min.u32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp ule i32 %a, %b
|
|
%sel = select i1 %cmp, i32 %a, i32 %b
|
|
ret i32 %sel
|
|
}
|
|
|
|
; *** ab, signed, i32
|
|
define i32 @ab_sgt_i32(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: ab_sgt_i32(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [ab_sgt_i32_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [ab_sgt_i32_param_1];
|
|
; CHECK-NEXT: max.s32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp sgt i32 %a, %b
|
|
%sel = select i1 %cmp, i32 %a, i32 %b
|
|
ret i32 %sel
|
|
}
|
|
|
|
define i32 @ab_sge_i32(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: ab_sge_i32(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [ab_sge_i32_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [ab_sge_i32_param_1];
|
|
; CHECK-NEXT: max.s32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp sge i32 %a, %b
|
|
%sel = select i1 %cmp, i32 %a, i32 %b
|
|
ret i32 %sel
|
|
}
|
|
|
|
define i32 @ab_slt_i32(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: ab_slt_i32(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [ab_slt_i32_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [ab_slt_i32_param_1];
|
|
; CHECK-NEXT: min.s32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp slt i32 %a, %b
|
|
%sel = select i1 %cmp, i32 %a, i32 %b
|
|
ret i32 %sel
|
|
}
|
|
|
|
define i32 @ab_sle_i32(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: ab_sle_i32(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [ab_sle_i32_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [ab_sle_i32_param_1];
|
|
; CHECK-NEXT: min.s32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp sle i32 %a, %b
|
|
%sel = select i1 %cmp, i32 %a, i32 %b
|
|
ret i32 %sel
|
|
}
|
|
|
|
; *** ba, unsigned, i32
|
|
define i32 @ba_ugt_i32(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: ba_ugt_i32(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [ba_ugt_i32_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [ba_ugt_i32_param_1];
|
|
; CHECK-NEXT: min.u32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp ugt i32 %a, %b
|
|
%sel = select i1 %cmp, i32 %b, i32 %a
|
|
ret i32 %sel
|
|
}
|
|
|
|
define i32 @ba_uge_i32(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: ba_uge_i32(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [ba_uge_i32_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [ba_uge_i32_param_1];
|
|
; CHECK-NEXT: min.u32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp uge i32 %a, %b
|
|
%sel = select i1 %cmp, i32 %b, i32 %a
|
|
ret i32 %sel
|
|
}
|
|
|
|
define i32 @ba_ult_i32(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: ba_ult_i32(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [ba_ult_i32_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [ba_ult_i32_param_1];
|
|
; CHECK-NEXT: max.u32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp ult i32 %a, %b
|
|
%sel = select i1 %cmp, i32 %b, i32 %a
|
|
ret i32 %sel
|
|
}
|
|
|
|
define i32 @ba_ule_i32(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: ba_ule_i32(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [ba_ule_i32_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [ba_ule_i32_param_1];
|
|
; CHECK-NEXT: max.u32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp ule i32 %a, %b
|
|
%sel = select i1 %cmp, i32 %b, i32 %a
|
|
ret i32 %sel
|
|
}
|
|
|
|
; *** ba, signed, i32
|
|
define i32 @ba_sgt_i32(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: ba_sgt_i32(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [ba_sgt_i32_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [ba_sgt_i32_param_1];
|
|
; CHECK-NEXT: min.s32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp sgt i32 %a, %b
|
|
%sel = select i1 %cmp, i32 %b, i32 %a
|
|
ret i32 %sel
|
|
}
|
|
|
|
define i32 @ba_sge_i32(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: ba_sge_i32(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [ba_sge_i32_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [ba_sge_i32_param_1];
|
|
; CHECK-NEXT: min.s32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp sge i32 %a, %b
|
|
%sel = select i1 %cmp, i32 %b, i32 %a
|
|
ret i32 %sel
|
|
}
|
|
|
|
define i32 @ba_slt_i32(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: ba_slt_i32(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [ba_slt_i32_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [ba_slt_i32_param_1];
|
|
; CHECK-NEXT: max.s32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp slt i32 %a, %b
|
|
%sel = select i1 %cmp, i32 %b, i32 %a
|
|
ret i32 %sel
|
|
}
|
|
|
|
define i32 @ba_sle_i32(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: ba_sle_i32(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b32 %r1, [ba_sle_i32_param_0];
|
|
; CHECK-NEXT: ld.param.b32 %r2, [ba_sle_i32_param_1];
|
|
; CHECK-NEXT: max.s32 %r3, %r1, %r2;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp sle i32 %a, %b
|
|
%sel = select i1 %cmp, i32 %b, i32 %a
|
|
ret i32 %sel
|
|
}
|
|
|
|
; *************************************
|
|
; * All variations with i64
|
|
|
|
; *** ab, unsigned, i64
|
|
define i64 @ab_ugt_i64(i64 %a, i64 %b) {
|
|
; CHECK-LABEL: ab_ugt_i64(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [ab_ugt_i64_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [ab_ugt_i64_param_1];
|
|
; CHECK-NEXT: max.u64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp ugt i64 %a, %b
|
|
%sel = select i1 %cmp, i64 %a, i64 %b
|
|
ret i64 %sel
|
|
}
|
|
|
|
define i64 @ab_uge_i64(i64 %a, i64 %b) {
|
|
; CHECK-LABEL: ab_uge_i64(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [ab_uge_i64_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [ab_uge_i64_param_1];
|
|
; CHECK-NEXT: max.u64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp uge i64 %a, %b
|
|
%sel = select i1 %cmp, i64 %a, i64 %b
|
|
ret i64 %sel
|
|
}
|
|
|
|
define i64 @ab_ult_i64(i64 %a, i64 %b) {
|
|
; CHECK-LABEL: ab_ult_i64(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [ab_ult_i64_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [ab_ult_i64_param_1];
|
|
; CHECK-NEXT: min.u64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp ult i64 %a, %b
|
|
%sel = select i1 %cmp, i64 %a, i64 %b
|
|
ret i64 %sel
|
|
}
|
|
|
|
define i64 @ab_ule_i64(i64 %a, i64 %b) {
|
|
; CHECK-LABEL: ab_ule_i64(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [ab_ule_i64_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [ab_ule_i64_param_1];
|
|
; CHECK-NEXT: min.u64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp ule i64 %a, %b
|
|
%sel = select i1 %cmp, i64 %a, i64 %b
|
|
ret i64 %sel
|
|
}
|
|
|
|
; *** ab, signed, i64
|
|
define i64 @ab_sgt_i64(i64 %a, i64 %b) {
|
|
; CHECK-LABEL: ab_sgt_i64(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [ab_sgt_i64_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [ab_sgt_i64_param_1];
|
|
; CHECK-NEXT: max.s64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp sgt i64 %a, %b
|
|
%sel = select i1 %cmp, i64 %a, i64 %b
|
|
ret i64 %sel
|
|
}
|
|
|
|
define i64 @ab_sge_i64(i64 %a, i64 %b) {
|
|
; CHECK-LABEL: ab_sge_i64(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [ab_sge_i64_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [ab_sge_i64_param_1];
|
|
; CHECK-NEXT: max.s64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp sge i64 %a, %b
|
|
%sel = select i1 %cmp, i64 %a, i64 %b
|
|
ret i64 %sel
|
|
}
|
|
|
|
define i64 @ab_slt_i64(i64 %a, i64 %b) {
|
|
; CHECK-LABEL: ab_slt_i64(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [ab_slt_i64_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [ab_slt_i64_param_1];
|
|
; CHECK-NEXT: min.s64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp slt i64 %a, %b
|
|
%sel = select i1 %cmp, i64 %a, i64 %b
|
|
ret i64 %sel
|
|
}
|
|
|
|
define i64 @ab_sle_i64(i64 %a, i64 %b) {
|
|
; CHECK-LABEL: ab_sle_i64(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [ab_sle_i64_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [ab_sle_i64_param_1];
|
|
; CHECK-NEXT: min.s64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp sle i64 %a, %b
|
|
%sel = select i1 %cmp, i64 %a, i64 %b
|
|
ret i64 %sel
|
|
}
|
|
|
|
; *** ba, unsigned, i64
|
|
define i64 @ba_ugt_i64(i64 %a, i64 %b) {
|
|
; CHECK-LABEL: ba_ugt_i64(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [ba_ugt_i64_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [ba_ugt_i64_param_1];
|
|
; CHECK-NEXT: min.u64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp ugt i64 %a, %b
|
|
%sel = select i1 %cmp, i64 %b, i64 %a
|
|
ret i64 %sel
|
|
}
|
|
|
|
define i64 @ba_uge_i64(i64 %a, i64 %b) {
|
|
; CHECK-LABEL: ba_uge_i64(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [ba_uge_i64_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [ba_uge_i64_param_1];
|
|
; CHECK-NEXT: min.u64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp uge i64 %a, %b
|
|
%sel = select i1 %cmp, i64 %b, i64 %a
|
|
ret i64 %sel
|
|
}
|
|
|
|
define i64 @ba_ult_i64(i64 %a, i64 %b) {
|
|
; CHECK-LABEL: ba_ult_i64(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [ba_ult_i64_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [ba_ult_i64_param_1];
|
|
; CHECK-NEXT: max.u64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp ult i64 %a, %b
|
|
%sel = select i1 %cmp, i64 %b, i64 %a
|
|
ret i64 %sel
|
|
}
|
|
|
|
define i64 @ba_ule_i64(i64 %a, i64 %b) {
|
|
; CHECK-LABEL: ba_ule_i64(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [ba_ule_i64_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [ba_ule_i64_param_1];
|
|
; CHECK-NEXT: max.u64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp ule i64 %a, %b
|
|
%sel = select i1 %cmp, i64 %b, i64 %a
|
|
ret i64 %sel
|
|
}
|
|
|
|
; *** ba, signed, i64
|
|
define i64 @ba_sgt_i64(i64 %a, i64 %b) {
|
|
; CHECK-LABEL: ba_sgt_i64(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [ba_sgt_i64_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [ba_sgt_i64_param_1];
|
|
; CHECK-NEXT: min.s64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp sgt i64 %a, %b
|
|
%sel = select i1 %cmp, i64 %b, i64 %a
|
|
ret i64 %sel
|
|
}
|
|
|
|
define i64 @ba_sge_i64(i64 %a, i64 %b) {
|
|
; CHECK-LABEL: ba_sge_i64(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [ba_sge_i64_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [ba_sge_i64_param_1];
|
|
; CHECK-NEXT: min.s64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp sge i64 %a, %b
|
|
%sel = select i1 %cmp, i64 %b, i64 %a
|
|
ret i64 %sel
|
|
}
|
|
|
|
define i64 @ba_slt_i64(i64 %a, i64 %b) {
|
|
; CHECK-LABEL: ba_slt_i64(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [ba_slt_i64_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [ba_slt_i64_param_1];
|
|
; CHECK-NEXT: max.s64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp slt i64 %a, %b
|
|
%sel = select i1 %cmp, i64 %b, i64 %a
|
|
ret i64 %sel
|
|
}
|
|
|
|
define i64 @ba_sle_i64(i64 %a, i64 %b) {
|
|
; CHECK-LABEL: ba_sle_i64(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<4>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [ba_sle_i64_param_0];
|
|
; CHECK-NEXT: ld.param.b64 %rd2, [ba_sle_i64_param_1];
|
|
; CHECK-NEXT: max.s64 %rd3, %rd1, %rd2;
|
|
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
|
; CHECK-NEXT: ret;
|
|
%cmp = icmp sle i64 %a, %b
|
|
%sel = select i1 %cmp, i64 %b, i64 %a
|
|
ret i64 %sel
|
|
}
|
|
|
|
define i32 @min_relu_s32(i32 %a, i32 %b) {
|
|
; SM90-LABEL: min_relu_s32(
|
|
; SM90: {
|
|
; SM90-NEXT: .reg .b32 %r<4>;
|
|
; SM90-EMPTY:
|
|
; SM90-NEXT: // %bb.0:
|
|
; SM90-NEXT: ld.param.b32 %r1, [min_relu_s32_param_0];
|
|
; SM90-NEXT: ld.param.b32 %r2, [min_relu_s32_param_1];
|
|
; SM90-NEXT: min.relu.s32 %r3, %r1, %r2;
|
|
; SM90-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; SM90-NEXT: ret;
|
|
;
|
|
; SM20-LABEL: min_relu_s32(
|
|
; SM20: {
|
|
; SM20-NEXT: .reg .b32 %r<5>;
|
|
; SM20-EMPTY:
|
|
; SM20-NEXT: // %bb.0:
|
|
; SM20-NEXT: ld.param.b32 %r1, [min_relu_s32_param_0];
|
|
; SM20-NEXT: ld.param.b32 %r2, [min_relu_s32_param_1];
|
|
; SM20-NEXT: min.s32 %r3, %r1, %r2;
|
|
; SM20-NEXT: max.s32 %r4, %r3, 0;
|
|
; SM20-NEXT: st.param.b32 [func_retval0], %r4;
|
|
; SM20-NEXT: ret;
|
|
%min = call i32 @llvm.smin.s32(i32 %a, i32 %b)
|
|
%max = call i32 @llvm.smax.s32(i32 %min, i32 0)
|
|
ret i32 %max
|
|
}
|
|
|
|
define i32 @max_relu_s32(i32 %a, i32 %b) {
|
|
; SM90-LABEL: max_relu_s32(
|
|
; SM90: {
|
|
; SM90-NEXT: .reg .b32 %r<4>;
|
|
; SM90-EMPTY:
|
|
; SM90-NEXT: // %bb.0:
|
|
; SM90-NEXT: ld.param.b32 %r1, [max_relu_s32_param_0];
|
|
; SM90-NEXT: ld.param.b32 %r2, [max_relu_s32_param_1];
|
|
; SM90-NEXT: max.relu.s32 %r3, %r1, %r2;
|
|
; SM90-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; SM90-NEXT: ret;
|
|
;
|
|
; SM20-LABEL: max_relu_s32(
|
|
; SM20: {
|
|
; SM20-NEXT: .reg .b32 %r<5>;
|
|
; SM20-EMPTY:
|
|
; SM20-NEXT: // %bb.0:
|
|
; SM20-NEXT: ld.param.b32 %r1, [max_relu_s32_param_0];
|
|
; SM20-NEXT: ld.param.b32 %r2, [max_relu_s32_param_1];
|
|
; SM20-NEXT: max.s32 %r3, %r1, %r2;
|
|
; SM20-NEXT: max.s32 %r4, %r3, 0;
|
|
; SM20-NEXT: st.param.b32 [func_retval0], %r4;
|
|
; SM20-NEXT: ret;
|
|
%max1 = call i32 @llvm.smax.s32(i32 %a, i32 %b)
|
|
%max2 = call i32 @llvm.smax.s32(i32 %max1, i32 0)
|
|
ret i32 %max2
|
|
}
|
|
|
|
define i32 @max_relu_s32_v2(i32 %a, i32 %b) {
|
|
; SM90-LABEL: max_relu_s32_v2(
|
|
; SM90: {
|
|
; SM90-NEXT: .reg .b32 %r<4>;
|
|
; SM90-EMPTY:
|
|
; SM90-NEXT: // %bb.0:
|
|
; SM90-NEXT: ld.param.b32 %r1, [max_relu_s32_v2_param_0];
|
|
; SM90-NEXT: ld.param.b32 %r2, [max_relu_s32_v2_param_1];
|
|
; SM90-NEXT: max.relu.s32 %r3, %r1, %r2;
|
|
; SM90-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; SM90-NEXT: ret;
|
|
;
|
|
; SM20-LABEL: max_relu_s32_v2(
|
|
; SM20: {
|
|
; SM20-NEXT: .reg .b32 %r<5>;
|
|
; SM20-EMPTY:
|
|
; SM20-NEXT: // %bb.0:
|
|
; SM20-NEXT: ld.param.b32 %r1, [max_relu_s32_v2_param_0];
|
|
; SM20-NEXT: ld.param.b32 %r2, [max_relu_s32_v2_param_1];
|
|
; SM20-NEXT: max.s32 %r3, %r1, %r2;
|
|
; SM20-NEXT: max.s32 %r4, %r3, 0;
|
|
; SM20-NEXT: st.param.b32 [func_retval0], %r4;
|
|
; SM20-NEXT: ret;
|
|
%max2 = call i32 @llvm.smax.s32(i32 %a, i32 0)
|
|
%max1 = call i32 @llvm.smax.s32(i32 %max2, i32 %b)
|
|
ret i32 %max1
|
|
}
|
|
|
|
define <2 x i16> @min_relu_s16x2(<2 x i16> %a, <2 x i16> %b) {
|
|
; SM90-LABEL: min_relu_s16x2(
|
|
; SM90: {
|
|
; SM90-NEXT: .reg .b32 %r<4>;
|
|
; SM90-EMPTY:
|
|
; SM90-NEXT: // %bb.0:
|
|
; SM90-NEXT: ld.param.b32 %r1, [min_relu_s16x2_param_0];
|
|
; SM90-NEXT: ld.param.b32 %r2, [min_relu_s16x2_param_1];
|
|
; SM90-NEXT: min.relu.s16x2 %r3, %r1, %r2;
|
|
; SM90-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; SM90-NEXT: ret;
|
|
;
|
|
; SM20-LABEL: min_relu_s16x2(
|
|
; SM20: {
|
|
; SM20-NEXT: .reg .b16 %rs<9>;
|
|
; SM20-EMPTY:
|
|
; SM20-NEXT: // %bb.0:
|
|
; SM20-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [min_relu_s16x2_param_0];
|
|
; SM20-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [min_relu_s16x2_param_1];
|
|
; SM20-NEXT: min.s16 %rs5, %rs1, %rs3;
|
|
; SM20-NEXT: min.s16 %rs6, %rs2, %rs4;
|
|
; SM20-NEXT: max.s16 %rs7, %rs6, 0;
|
|
; SM20-NEXT: max.s16 %rs8, %rs5, 0;
|
|
; SM20-NEXT: st.param.v2.b16 [func_retval0], {%rs8, %rs7};
|
|
; SM20-NEXT: ret;
|
|
%min = call <2 x i16> @llvm.smin.v2i16(<2 x i16> %a, <2 x i16> %b)
|
|
%max = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %min, <2 x i16> zeroinitializer)
|
|
ret <2 x i16> %max
|
|
}
|
|
|
|
define <2 x i16> @max_relu_s16x2(<2 x i16> %a, <2 x i16> %b) {
|
|
; SM90-LABEL: max_relu_s16x2(
|
|
; SM90: {
|
|
; SM90-NEXT: .reg .b32 %r<4>;
|
|
; SM90-EMPTY:
|
|
; SM90-NEXT: // %bb.0:
|
|
; SM90-NEXT: ld.param.b32 %r1, [max_relu_s16x2_param_0];
|
|
; SM90-NEXT: ld.param.b32 %r2, [max_relu_s16x2_param_1];
|
|
; SM90-NEXT: max.relu.s16x2 %r3, %r1, %r2;
|
|
; SM90-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; SM90-NEXT: ret;
|
|
;
|
|
; SM20-LABEL: max_relu_s16x2(
|
|
; SM20: {
|
|
; SM20-NEXT: .reg .b16 %rs<9>;
|
|
; SM20-EMPTY:
|
|
; SM20-NEXT: // %bb.0:
|
|
; SM20-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [max_relu_s16x2_param_0];
|
|
; SM20-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [max_relu_s16x2_param_1];
|
|
; SM20-NEXT: max.s16 %rs5, %rs1, %rs3;
|
|
; SM20-NEXT: max.s16 %rs6, %rs2, %rs4;
|
|
; SM20-NEXT: max.s16 %rs7, %rs6, 0;
|
|
; SM20-NEXT: max.s16 %rs8, %rs5, 0;
|
|
; SM20-NEXT: st.param.v2.b16 [func_retval0], {%rs8, %rs7};
|
|
; SM20-NEXT: ret;
|
|
%max1 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %a, <2 x i16> %b)
|
|
%max2 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %max1, <2 x i16> zeroinitializer)
|
|
ret <2 x i16> %max2
|
|
}
|
|
|
|
define <2 x i16> @max_relu_s16x2_v2(<2 x i16> %a, <2 x i16> %b) {
|
|
; SM90-LABEL: max_relu_s16x2_v2(
|
|
; SM90: {
|
|
; SM90-NEXT: .reg .b32 %r<4>;
|
|
; SM90-EMPTY:
|
|
; SM90-NEXT: // %bb.0:
|
|
; SM90-NEXT: ld.param.b32 %r1, [max_relu_s16x2_v2_param_0];
|
|
; SM90-NEXT: ld.param.b32 %r2, [max_relu_s16x2_v2_param_1];
|
|
; SM90-NEXT: max.relu.s16x2 %r3, %r1, %r2;
|
|
; SM90-NEXT: st.param.b32 [func_retval0], %r3;
|
|
; SM90-NEXT: ret;
|
|
;
|
|
; SM20-LABEL: max_relu_s16x2_v2(
|
|
; SM20: {
|
|
; SM20-NEXT: .reg .b16 %rs<9>;
|
|
; SM20-EMPTY:
|
|
; SM20-NEXT: // %bb.0:
|
|
; SM20-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [max_relu_s16x2_v2_param_0];
|
|
; SM20-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [max_relu_s16x2_v2_param_1];
|
|
; SM20-NEXT: max.s16 %rs5, %rs1, %rs3;
|
|
; SM20-NEXT: max.s16 %rs6, %rs2, %rs4;
|
|
; SM20-NEXT: max.s16 %rs7, %rs6, 0;
|
|
; SM20-NEXT: max.s16 %rs8, %rs5, 0;
|
|
; SM20-NEXT: st.param.v2.b16 [func_retval0], {%rs8, %rs7};
|
|
; SM20-NEXT: ret;
|
|
%max2 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %a, <2 x i16> zeroinitializer)
|
|
%max1 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %max2, <2 x i16> %b)
|
|
ret <2 x i16> %max1
|
|
}
|