Alex MacLean d27802a217
[DAGCombiner] Fold setcc of trunc, generalizing some NVPTX isel logic (#150270)
That change adds support for folding a SETCC when one or both of the
operands is a TRUNCATE with the appropriate no-wrap flags. This pattern
can occur when promoting i8 operations in NVPTX, and we currently have
some ISel rules to try to handle it.
2025-08-05 19:20:17 -07:00

270 lines
8.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mcpu=sm_50 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mcpu=sm_50 | %ptxas-verify -arch=sm_50 %}
target triple = "nvptx64-nvidia-cuda"
define i1 @trunc_nsw_singed_const(i32 %a) {
; CHECK-LABEL: trunc_nsw_singed_const(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [trunc_nsw_singed_const_param_0];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: setp.gt.s32 %p1, %r2, -1;
; CHECK-NEXT: selp.b32 %r3, -1, 0, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%a2 = add i32 %a, 1
%b = trunc nsw i32 %a2 to i8
%c = icmp sgt i8 %b, -1
ret i1 %c
}
define i1 @trunc_nuw_singed_const(i32 %a) {
; CHECK-LABEL: trunc_nuw_singed_const(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<4>;
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b8 %rs1, [trunc_nuw_singed_const_param_0];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: cvt.s16.s8 %rs3, %rs2;
; CHECK-NEXT: setp.lt.s16 %p1, %rs3, 100;
; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-NEXT: ret;
%a2 = add i32 %a, 1
%b = trunc nuw i32 %a2 to i8
%c = icmp slt i8 %b, 100
ret i1 %c
}
define i1 @trunc_nsw_unsinged_const(i32 %a) {
; CHECK-LABEL: trunc_nsw_unsinged_const(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<4>;
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b8 %rs1, [trunc_nsw_unsinged_const_param_0];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: and.b16 %rs3, %rs2, 255;
; CHECK-NEXT: setp.lt.u16 %p1, %rs3, 236;
; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-NEXT: ret;
%a2 = add i32 %a, 1
%b = trunc nsw i32 %a2 to i8
%c = icmp ult i8 %b, -20
ret i1 %c
}
define i1 @trunc_nuw_unsinged_const(i32 %a) {
; CHECK-LABEL: trunc_nuw_unsinged_const(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [trunc_nuw_unsinged_const_param_0];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: setp.gt.u32 %p1, %r2, 100;
; CHECK-NEXT: selp.b32 %r3, -1, 0, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%a2 = add i32 %a, 1
%b = trunc nuw i32 %a2 to i8
%c = icmp ugt i8 %b, 100
ret i1 %c
}
define i1 @trunc_nsw_eq_const(i32 %a) {
; CHECK-LABEL: trunc_nsw_eq_const(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [trunc_nsw_eq_const_param_0];
; CHECK-NEXT: setp.eq.b32 %p1, %r1, 99;
; CHECK-NEXT: selp.b32 %r2, -1, 0, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%a2 = add i32 %a, 1
%b = trunc nsw i32 %a2 to i8
%c = icmp eq i8 %b, 100
ret i1 %c
}
define i1 @trunc_nuw_eq_const(i32 %a) {
; CHECK-LABEL: trunc_nuw_eq_const(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [trunc_nuw_eq_const_param_0];
; CHECK-NEXT: setp.eq.b32 %p1, %r1, 99;
; CHECK-NEXT: selp.b32 %r2, -1, 0, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%a2 = add i32 %a, 1
%b = trunc nuw i32 %a2 to i8
%c = icmp eq i8 %b, 100
ret i1 %c
}
;;;
define i1 @trunc_nsw_singed(i32 %a1, i32 %a2) {
; CHECK-LABEL: trunc_nsw_singed(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b32 %r<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [trunc_nsw_singed_param_0];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: ld.param.b32 %r3, [trunc_nsw_singed_param_1];
; CHECK-NEXT: add.s32 %r4, %r3, 7;
; CHECK-NEXT: setp.gt.s32 %p1, %r2, %r4;
; CHECK-NEXT: selp.b32 %r5, -1, 0, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NEXT: ret;
%b1 = add i32 %a1, 1
%b2 = add i32 %a2, 7
%c1 = trunc nsw i32 %b1 to i8
%c2 = trunc nsw i32 %b2 to i8
%c = icmp sgt i8 %c1, %c2
ret i1 %c
}
define i1 @trunc_nuw_singed(i32 %a1, i32 %a2) {
; CHECK-LABEL: trunc_nuw_singed(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<7>;
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b8 %rs1, [trunc_nuw_singed_param_0];
; CHECK-NEXT: ld.param.b8 %rs2, [trunc_nuw_singed_param_1];
; CHECK-NEXT: add.s16 %rs3, %rs1, 1;
; CHECK-NEXT: cvt.s16.s8 %rs4, %rs3;
; CHECK-NEXT: add.s16 %rs5, %rs2, 6;
; CHECK-NEXT: cvt.s16.s8 %rs6, %rs5;
; CHECK-NEXT: setp.lt.s16 %p1, %rs4, %rs6;
; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-NEXT: ret;
%b1 = add i32 %a1, 1
%b2 = add i32 %a2, 6
%c1 = trunc nuw i32 %b1 to i8
%c2 = trunc nuw i32 %b2 to i8
%c = icmp slt i8 %c1, %c2
ret i1 %c
}
define i1 @trunc_nsw_unsinged(i32 %a1, i32 %a2) {
; CHECK-LABEL: trunc_nsw_unsinged(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<7>;
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b8 %rs1, [trunc_nsw_unsinged_param_0];
; CHECK-NEXT: ld.param.b8 %rs2, [trunc_nsw_unsinged_param_1];
; CHECK-NEXT: add.s16 %rs3, %rs1, 1;
; CHECK-NEXT: and.b16 %rs4, %rs3, 255;
; CHECK-NEXT: add.s16 %rs5, %rs2, 4;
; CHECK-NEXT: and.b16 %rs6, %rs5, 255;
; CHECK-NEXT: setp.lt.u16 %p1, %rs4, %rs6;
; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-NEXT: ret;
%b1 = add i32 %a1, 1
%b2 = add i32 %a2, 4
%c1 = trunc nsw i32 %b1 to i8
%c2 = trunc nsw i32 %b2 to i8
%c = icmp ult i8 %c1, %c2
ret i1 %c
}
define i1 @trunc_nuw_unsinged(i32 %a1, i32 %a2) {
; CHECK-LABEL: trunc_nuw_unsinged(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b32 %r<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [trunc_nuw_unsinged_param_0];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: ld.param.b32 %r3, [trunc_nuw_unsinged_param_1];
; CHECK-NEXT: add.s32 %r4, %r3, 5;
; CHECK-NEXT: setp.gt.u32 %p1, %r2, %r4;
; CHECK-NEXT: selp.b32 %r5, -1, 0, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NEXT: ret;
%b1 = add i32 %a1, 1
%b2 = add i32 %a2, 5
%c1 = trunc nuw i32 %b1 to i8
%c2 = trunc nuw i32 %b2 to i8
%c = icmp ugt i8 %c1, %c2
ret i1 %c
}
define i1 @trunc_nsw_eq(i32 %a1, i32 %a2) {
; CHECK-LABEL: trunc_nsw_eq(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b32 %r<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [trunc_nsw_eq_param_0];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: ld.param.b32 %r3, [trunc_nsw_eq_param_1];
; CHECK-NEXT: add.s32 %r4, %r3, 3;
; CHECK-NEXT: setp.eq.b32 %p1, %r2, %r4;
; CHECK-NEXT: selp.b32 %r5, -1, 0, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NEXT: ret;
%b1 = add i32 %a1, 1
%b2 = add i32 %a2, 3
%c1 = trunc nsw i32 %b1 to i8
%c2 = trunc nsw i32 %b2 to i8
%c = icmp eq i8 %c1, %c2
ret i1 %c
}
define i1 @trunc_nuw_eq(i32 %a1, i32 %a2) {
; CHECK-LABEL: trunc_nuw_eq(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b32 %r<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [trunc_nuw_eq_param_0];
; CHECK-NEXT: add.s32 %r2, %r1, 2;
; CHECK-NEXT: ld.param.b32 %r3, [trunc_nuw_eq_param_1];
; CHECK-NEXT: add.s32 %r4, %r3, 1;
; CHECK-NEXT: setp.eq.b32 %p1, %r2, %r4;
; CHECK-NEXT: selp.b32 %r5, -1, 0, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NEXT: ret;
%b1 = add i32 %a1, 2
%b2 = add i32 %a2, 1
%c1 = trunc nuw i32 %b1 to i8
%c2 = trunc nuw i32 %b2 to i8
%c = icmp eq i8 %c1, %c2
ret i1 %c
}