; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mcpu=sm_50 | FileCheck %s ; RUN: %if ptxas %{ llc < %s -mcpu=sm_50 | %ptxas-verify %} target triple = "nvptx64-nvidia-cuda" define i32 @test_simple_rotl(i32 %x) { ; CHECK-LABEL: test_simple_rotl( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_simple_rotl_param_0]; ; CHECK-NEXT: shf.l.wrap.b32 %r2, %r1, %r1, 7; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %shl = shl i32 %x, 7 %shr = lshr i32 %x, 25 %add = add i32 %shl, %shr ret i32 %add } define i32 @test_simple_rotr(i32 %x) { ; CHECK-LABEL: test_simple_rotr( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_simple_rotr_param_0]; ; CHECK-NEXT: shf.l.wrap.b32 %r2, %r1, %r1, 25; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %shr = lshr i32 %x, 7 %shl = shl i32 %x, 25 %add = add i32 %shr, %shl ret i32 %add } define i32 @test_rotl_var(i32 %x, i32 %y) { ; CHECK-LABEL: test_rotl_var( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_rotl_var_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [test_rotl_var_param_1]; ; CHECK-NEXT: shf.l.wrap.b32 %r3, %r1, %r1, %r2; ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %shl = shl i32 %x, %y %sub = sub i32 32, %y %shr = lshr i32 %x, %sub %add = add i32 %shl, %shr ret i32 %add } define i32 @test_rotr_var(i32 %x, i32 %y) { ; CHECK-LABEL: test_rotr_var( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_rotr_var_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [test_rotr_var_param_1]; ; CHECK-NEXT: shf.r.wrap.b32 %r3, %r1, %r1, %r2; ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %shr = lshr i32 %x, %y %sub = sub i32 32, %y %shl = shl i32 %x, %sub %add = add i32 %shr, %shl ret i32 %add } define i32 @test_invalid_rotl_var_and(i32 %x, i32 %y) { ; CHECK-LABEL: test_invalid_rotl_var_and( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_invalid_rotl_var_and_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [test_invalid_rotl_var_and_param_1]; ; CHECK-NEXT: shl.b32 %r3, %r1, %r2; ; CHECK-NEXT: neg.s32 %r4, %r2; ; CHECK-NEXT: and.b32 %r5, %r4, 31; ; CHECK-NEXT: shr.u32 %r6, %r1, %r5; ; CHECK-NEXT: add.s32 %r7, %r6, %r3; ; CHECK-NEXT: st.param.b32 [func_retval0], %r7; ; CHECK-NEXT: ret; %shr = shl i32 %x, %y %sub = sub nsw i32 0, %y %and = and i32 %sub, 31 %shl = lshr i32 %x, %and %add = add i32 %shl, %shr ret i32 %add } define i32 @test_invalid_rotr_var_and(i32 %x, i32 %y) { ; CHECK-LABEL: test_invalid_rotr_var_and( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_invalid_rotr_var_and_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [test_invalid_rotr_var_and_param_1]; ; CHECK-NEXT: shr.u32 %r3, %r1, %r2; ; CHECK-NEXT: neg.s32 %r4, %r2; ; CHECK-NEXT: and.b32 %r5, %r4, 31; ; CHECK-NEXT: shl.b32 %r6, %r1, %r5; ; CHECK-NEXT: add.s32 %r7, %r3, %r6; ; CHECK-NEXT: st.param.b32 [func_retval0], %r7; ; CHECK-NEXT: ret; %shr = lshr i32 %x, %y %sub = sub nsw i32 0, %y %and = and i32 %sub, 31 %shl = shl i32 %x, %and %add = add i32 %shr, %shl ret i32 %add } define i32 @test_fshl_special_case(i32 %x0, i32 %x1, i32 %y) { ; CHECK-LABEL: test_fshl_special_case( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_fshl_special_case_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [test_fshl_special_case_param_1]; ; CHECK-NEXT: ld.param.b32 %r3, [test_fshl_special_case_param_2]; ; CHECK-NEXT: shf.l.wrap.b32 %r4, %r2, %r1, %r3; ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %shl = shl i32 %x0, %y %srli = lshr i32 %x1, 1 %x = xor i32 %y, 31 %srlo = lshr i32 %srli, %x %o = add i32 %shl, %srlo ret i32 %o } define i32 @test_fshr_special_case(i32 %x0, i32 %x1, i32 %y) { ; CHECK-LABEL: test_fshr_special_case( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_fshr_special_case_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [test_fshr_special_case_param_1]; ; CHECK-NEXT: ld.param.b32 %r3, [test_fshr_special_case_param_2]; ; CHECK-NEXT: shf.r.wrap.b32 %r4, %r2, %r1, %r3; ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %shl = lshr i32 %x1, %y %srli = shl i32 %x0, 1 %x = xor i32 %y, 31 %srlo = shl i32 %srli, %x %o = add i32 %shl, %srlo ret i32 %o } define i64 @test_rotl_udiv_special_case(i64 %i) { ; CHECK-LABEL: test_rotl_udiv_special_case( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<5>; ; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_rotl_udiv_special_case_param_0]; ; CHECK-NEXT: mul.hi.u64 %rd2, %rd1, -6148914691236517205; ; CHECK-NEXT: shr.u64 %rd3, %rd2, 1; ; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd3; ; CHECK-NEXT: shf.l.wrap.b32 %r3, %r2, %r1, 28; ; CHECK-NEXT: shf.l.wrap.b32 %r4, %r1, %r2, 28; ; CHECK-NEXT: mov.b64 %rd4, {%r4, %r3}; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd4; ; CHECK-NEXT: ret; %lhs_div = udiv i64 %i, 3 %rhs_div = udiv i64 %i, 48 %lhs_shift = shl i64 %lhs_div, 60 %out = add i64 %lhs_shift, %rhs_div ret i64 %out } define i32 @test_rotl_mul_special_case(i32 %i) { ; CHECK-LABEL: test_rotl_mul_special_case( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_rotl_mul_special_case_param_0]; ; CHECK-NEXT: mul.lo.s32 %r2, %r1, 9; ; CHECK-NEXT: shf.l.wrap.b32 %r3, %r2, %r2, 7; ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %lhs_mul = mul i32 %i, 9 %rhs_mul = mul i32 %i, 1152 %lhs_shift = lshr i32 %lhs_mul, 25 %out = add i32 %lhs_shift, %rhs_mul ret i32 %out } define i64 @test_rotl_mul_with_mask_special_case(i64 %i) { ; CHECK-LABEL: test_rotl_mul_with_mask_special_case( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<7>; ; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_rotl_mul_with_mask_special_case_param_0]; ; CHECK-NEXT: mul.lo.s64 %rd2, %rd1, 9; ; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd2; ; CHECK-NEXT: shf.l.wrap.b32 %r5, %r4, %r1, 7; ; CHECK-NEXT: shf.l.wrap.b32 %r6, %r1, %r2, 7; ; CHECK-NEXT: mov.b64 %rd3, {%r5, %r6}; ; CHECK-NEXT: and.b64 %rd4, %rd3, 255; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd4; ; CHECK-NEXT: ret; %lhs_mul = mul i64 %i, 1152 %rhs_mul = mul i64 %i, 9 %lhs_and = and i64 %lhs_mul, 160 %rhs_shift = lshr i64 %rhs_mul, 57 %out = add i64 %lhs_and, %rhs_shift ret i64 %out } define i32 @test_fshl_with_mask_special_case(i32 %x) { ; CHECK-LABEL: test_fshl_with_mask_special_case( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_fshl_with_mask_special_case_param_0]; ; CHECK-NEXT: or.b32 %r2, %r1, 1; ; CHECK-NEXT: shf.l.wrap.b32 %r3, %r1, %r2, 5; ; CHECK-NEXT: and.b32 %r4, %r3, -31; ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %or1 = or i32 %x, 1 %sh1 = shl i32 %or1, 5 %sh2 = lshr i32 %x, 27 %1 = and i32 %sh2, 1 %r = add i32 %sh1, %1 ret i32 %r }