llvm-project/llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll
Matt Arsenault bbde792786
AMDGPU: Relax shouldCoalesce to allow more register tuple widening (#166475)
Allow widening up to 128-bit registers or if the new register class
is at least as large as one of the existing register classes.

This was artificially limiting. In particular this was doing the wrong
thing with sequences involving copies between VGPRs and AV registers.
Nearly all test changes are improvements.

The coalescer does not just widen registers out of nowhere. If it's
trying
to "widen" a register, it's generally packing a register into an
existing
register tuple, or in a situation where the constraints imply the wider
class anyway. 067a11015 addressed the allocation failure concern by
rejecting coalescing if there are no available registers. The original
change in a4e63ead4b didn't include a realistic testcase to judge if
this is harmful for pressure. I would expect any issues from this to
be of garden variety subreg handling issue. We could use more dynamic
state information here if it really is an issue.

I get the best results by removing this override completely. This is
a smaller step for patch splitting purposes.
2025-11-11 13:50:57 -08:00

1352 lines
56 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -o - %s | FileCheck -check-prefixes=GFX942 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-4-generic --amdhsa-code-object-version=6 -o - %s | FileCheck -check-prefixes=GFX942 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -o - %s | FileCheck -check-prefixes=GFX1030 %s
; Sample test to check how we deal with division/modulos by 64 bit constants.
define noundef i64 @srem64_3(i64 noundef %i) {
; GFX9-LABEL: srem64_3:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s6, 0x55555556
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s6
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s7, 0x55555555
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v2, v4
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s7, v[2:3]
; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v1
; GFX9-NEXT: v_mul_lo_u32 v6, v4, s6
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v5, v3
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s7, v[2:3]
; GFX9-NEXT: v_mul_lo_u32 v5, v4, s7
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, s6, v[2:3]
; GFX9-NEXT: v_add3_u32 v3, v6, v3, v5
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v3, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, 3, 0
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, 3, v[3:4]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: srem64_3:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s2, 0x55555556
; GFX942-NEXT: v_mul_hi_u32 v2, v0, s2
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s2, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: s_mov_b32 s3, 0x55555555
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, s3, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s3, v[4:5]
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, s2, v[4:5]
; GFX942-NEXT: v_mul_lo_u32 v6, v2, s3
; GFX942-NEXT: v_mul_lo_u32 v2, v2, s2
; GFX942-NEXT: v_add3_u32 v5, v2, v5, v6
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 31, v5
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[4:5], 0, v[2:3]
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, 3, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, 3, v[2:3]
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: srem64_3:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, 0x55555556, v0
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0x55555556, v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v2, v4
; GFX1030-NEXT: v_ashrrev_i32_e32 v4, 31, v1
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555555, v0, v[2:3]
; GFX1030-NEXT: v_add_co_u32 v2, s4, v5, v3
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mul_lo_u32 v5, 0x55555555, v4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555555, v1, v[2:3]
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555556, v4, v[2:3]
; GFX1030-NEXT: v_mul_lo_u32 v4, 0x55555556, v4
; GFX1030-NEXT: v_add3_u32 v3, v4, v3, v5
; GFX1030-NEXT: v_lshrrev_b32_e32 v4, 31, v3
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
; GFX1030-NEXT: v_add_co_ci_u32_e64 v4, null, 0, v3, vcc_lo
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v2, 3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, v4, 3, v[3:4]
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = srem i64 %i, 3
ret i64 %rem
}
define noundef i64 @srem64_6(i64 noundef %i) {
; GFX9-LABEL: srem64_6:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s6, 0x55555556
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s6
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s7, 0x55555555
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v2, v4
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s7, v[2:3]
; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v1
; GFX9-NEXT: v_mul_lo_u32 v6, v4, s6
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v5, v3
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s7, v[2:3]
; GFX9-NEXT: v_mul_lo_u32 v5, v4, s7
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, s6, v[2:3]
; GFX9-NEXT: v_add3_u32 v3, v6, v3, v5
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v3, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, 3, 0
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, 3, v[3:4]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: srem64_6:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s2, 0x55555556
; GFX942-NEXT: v_mul_hi_u32 v2, v0, s2
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s2, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: s_mov_b32 s3, 0x55555555
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, s3, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s3, v[4:5]
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, s2, v[4:5]
; GFX942-NEXT: v_mul_lo_u32 v6, v2, s3
; GFX942-NEXT: v_mul_lo_u32 v2, v2, s2
; GFX942-NEXT: v_add3_u32 v5, v2, v5, v6
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 31, v5
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[4:5], 0, v[2:3]
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, 3, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, 3, v[2:3]
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: srem64_6:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, 0x55555556, v0
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0x55555556, v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v2, v4
; GFX1030-NEXT: v_ashrrev_i32_e32 v4, 31, v1
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555555, v0, v[2:3]
; GFX1030-NEXT: v_add_co_u32 v2, s4, v5, v3
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mul_lo_u32 v5, 0x55555555, v4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555555, v1, v[2:3]
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555556, v4, v[2:3]
; GFX1030-NEXT: v_mul_lo_u32 v4, 0x55555556, v4
; GFX1030-NEXT: v_add3_u32 v3, v4, v3, v5
; GFX1030-NEXT: v_lshrrev_b32_e32 v4, 31, v3
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
; GFX1030-NEXT: v_add_co_ci_u32_e64 v4, null, 0, v3, vcc_lo
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v2, 3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, v4, 3, v[3:4]
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = srem i64 %i, 3
ret i64 %rem
}
define noundef i64 @urem64_3(i64 noundef %i) {
; GFX9-LABEL: urem64_3:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s4
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s6, 0xaaaaaaaa
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s4, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v2, v4
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s6, v[2:3]
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v5, v3
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_alignbit_b32 v2, v4, v3, 1
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, 3, 0
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 1, v4
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, 3, v[3:4]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: urem64_3:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s0, 0xaaaaaaab
; GFX942-NEXT: v_mul_hi_u32 v2, v0, s0
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s0, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: s_mov_b32 s2, 0xaaaaaaaa
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, s2, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v1, s2, v[2:3]
; GFX942-NEXT: v_alignbit_b32 v2, v3, v2, 1
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, 3, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_lshrrev_b32_e32 v3, 1, v3
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, 3, v[2:3]
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: urem64_3:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, 0xaaaaaaab, v0
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0xaaaaaaab, v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v2, v4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0xaaaaaaaa, v0, v[2:3]
; GFX1030-NEXT: v_add_co_u32 v2, s4, v5, v3
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, 0xaaaaaaaa, v1, v[2:3]
; GFX1030-NEXT: v_alignbit_b32 v2, v4, v3, 1
; GFX1030-NEXT: v_lshrrev_b32_e32 v4, 1, v4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v2, 3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, v4, 3, v[3:4]
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = urem i64 %i, 3
ret i64 %rem
}
define noundef i64 @urem64_6(i64 noundef %i) {
; GFX9-LABEL: urem64_6:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s4
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s6, 0xaaaaaaaa
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s4, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v2, v4
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s6, v[2:3]
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v5, v3
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_alignbit_b32 v2, v4, v3, 2
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, 6, 0
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 2, v4
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, 6, v[3:4]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: urem64_6:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s0, 0xaaaaaaab
; GFX942-NEXT: v_mul_hi_u32 v2, v0, s0
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s0, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: s_mov_b32 s2, 0xaaaaaaaa
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, s2, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v1, s2, v[2:3]
; GFX942-NEXT: v_alignbit_b32 v2, v3, v2, 2
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, 6, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_lshrrev_b32_e32 v3, 2, v3
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, 6, v[2:3]
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: urem64_6:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, 0xaaaaaaab, v0
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0xaaaaaaab, v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v2, v4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0xaaaaaaaa, v0, v[2:3]
; GFX1030-NEXT: v_add_co_u32 v2, s4, v5, v3
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, 0xaaaaaaaa, v1, v[2:3]
; GFX1030-NEXT: v_alignbit_b32 v2, v4, v3, 2
; GFX1030-NEXT: v_lshrrev_b32_e32 v4, 2, v4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v2, 6, 0
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, v4, 6, v[3:4]
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = urem i64 %i, 6
ret i64 %rem
}
define noundef i64 @sdiv64_3(i64 noundef %i) {
; GFX9-LABEL: sdiv64_3:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s6, 0x55555556
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s6
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s7, 0x55555555
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v2, v4
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s7, v[2:3]
; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v1
; GFX9-NEXT: v_mul_lo_u32 v4, v0, s7
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v5, v3
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s7, v[2:3]
; GFX9-NEXT: v_mul_lo_u32 v5, v0, s6
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[2:3]
; GFX9-NEXT: v_add3_u32 v1, v5, v1, v4
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: sdiv64_3:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s2, 0x55555556
; GFX942-NEXT: v_mul_hi_u32 v2, v0, s2
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s2, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: s_mov_b32 s3, 0x55555555
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, s3, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s3, v[4:5]
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, s2, v[4:5]
; GFX942-NEXT: v_mul_lo_u32 v4, v2, s3
; GFX942-NEXT: v_mul_lo_u32 v2, v2, s2
; GFX942-NEXT: v_add3_u32 v1, v2, v1, v4
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: sdiv64_3:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, 0x55555556, v0
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0x55555556, v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v2, v4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555555, v0, v[2:3]
; GFX1030-NEXT: v_ashrrev_i32_e32 v0, 31, v1
; GFX1030-NEXT: v_mul_lo_u32 v4, 0x55555555, v0
; GFX1030-NEXT: v_add_co_u32 v2, s4, v5, v3
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mul_lo_u32 v5, 0x55555556, v0
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555555, v1, v[2:3]
; GFX1030-NEXT: v_mad_u64_u32 v[0:1], null, 0x55555556, v0, v[2:3]
; GFX1030-NEXT: v_add3_u32 v1, v5, v1, v4
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = sdiv i64 %i, 3
ret i64 %div
}
define noundef i64 @sdiv64_6(i64 noundef %i) {
; GFX9-LABEL: sdiv64_6:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s6, 0x55555556
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s6
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s7, 0x55555555
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v2, v4
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s7, v[2:3]
; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v1
; GFX9-NEXT: v_mul_lo_u32 v4, v0, s7
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v5, v3
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s7, v[2:3]
; GFX9-NEXT: v_mul_lo_u32 v5, v0, s6
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[2:3]
; GFX9-NEXT: v_add3_u32 v1, v5, v1, v4
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: sdiv64_6:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s2, 0x55555556
; GFX942-NEXT: v_mul_hi_u32 v2, v0, s2
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s2, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: s_mov_b32 s3, 0x55555555
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, s3, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s3, v[4:5]
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, s2, v[4:5]
; GFX942-NEXT: v_mul_lo_u32 v4, v2, s3
; GFX942-NEXT: v_mul_lo_u32 v2, v2, s2
; GFX942-NEXT: v_add3_u32 v1, v2, v1, v4
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: sdiv64_6:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, 0x55555556, v0
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0x55555556, v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v2, v4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555555, v0, v[2:3]
; GFX1030-NEXT: v_ashrrev_i32_e32 v0, 31, v1
; GFX1030-NEXT: v_mul_lo_u32 v4, 0x55555555, v0
; GFX1030-NEXT: v_add_co_u32 v2, s4, v5, v3
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mul_lo_u32 v5, 0x55555556, v0
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555555, v1, v[2:3]
; GFX1030-NEXT: v_mad_u64_u32 v[0:1], null, 0x55555556, v0, v[2:3]
; GFX1030-NEXT: v_add3_u32 v1, v5, v1, v4
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = sdiv i64 %i, 3
ret i64 %div
}
define noundef i64 @udiv64_3(i64 noundef %i) {
; GFX9-LABEL: udiv64_3:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s4
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s6, 0xaaaaaaaa
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s4, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v2, v4
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s6, v[2:3]
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v5, v3
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_alignbit_b32 v0, v1, v0, 1
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 1, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: udiv64_3:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s0, 0xaaaaaaab
; GFX942-NEXT: v_mul_hi_u32 v2, v0, s0
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s0, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: s_mov_b32 s2, 0xaaaaaaaa
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, s2, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v1, s2, v[2:3]
; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 1
; GFX942-NEXT: v_lshrrev_b32_e32 v1, 1, v1
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: udiv64_3:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, 0xaaaaaaab, v0
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0xaaaaaaab, v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v2, v4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0xaaaaaaaa, v0, v[2:3]
; GFX1030-NEXT: v_add_co_u32 v2, s4, v5, v3
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mad_u64_u32 v[0:1], null, 0xaaaaaaaa, v1, v[2:3]
; GFX1030-NEXT: v_alignbit_b32 v0, v1, v0, 1
; GFX1030-NEXT: v_lshrrev_b32_e32 v1, 1, v1
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = udiv i64 %i, 3
ret i64 %div
}
define noundef i64 @udiv64_6(i64 noundef %i) {
; GFX9-LABEL: udiv64_6:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s4
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s6, 0xaaaaaaaa
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s4, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v2, v4
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s6, v[2:3]
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v5, v3
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_alignbit_b32 v0, v1, v0, 2
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 2, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: udiv64_6:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s0, 0xaaaaaaab
; GFX942-NEXT: v_mul_hi_u32 v2, v0, s0
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s0, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: s_mov_b32 s2, 0xaaaaaaaa
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, s2, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v1, s2, v[2:3]
; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 2
; GFX942-NEXT: v_lshrrev_b32_e32 v1, 2, v1
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: udiv64_6:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, 0xaaaaaaab, v0
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0xaaaaaaab, v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v2, v4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0xaaaaaaaa, v0, v[2:3]
; GFX1030-NEXT: v_add_co_u32 v2, s4, v5, v3
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mad_u64_u32 v[0:1], null, 0xaaaaaaaa, v1, v[2:3]
; GFX1030-NEXT: v_alignbit_b32 v0, v1, v0, 2
; GFX1030-NEXT: v_lshrrev_b32_e32 v1, 2, v1
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = udiv i64 %i, 6
ret i64 %div
}
define noundef i64 @srem64_2(i64 noundef %i) {
; GFX9-LABEL: srem64_2:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
; GFX9-NEXT: v_and_b32_e32 v2, -2, v2
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: srem64_2:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, v[2:3]
; GFX942-NEXT: v_and_b32_e32 v2, -2, v2
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: srem64_2:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v1, vcc_lo
; GFX1030-NEXT: v_and_b32_e32 v2, -2, v2
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = srem i64 %i, 2
ret i64 %rem
}
define noundef i64 @sdiv64_2(i64 noundef %i) {
; GFX9-LABEL: sdiv64_2:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: sdiv64_2:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: sdiv64_2:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1030-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = sdiv i64 %i, 2
ret i64 %div
}
define noundef i64 @urem64_2(i64 noundef %i) {
; GFX9-LABEL: urem64_2:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: urem64_2:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: urem64_2:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1030-NEXT: v_mov_b32_e32 v1, 0
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = urem i64 %i, 2
ret i64 %rem
}
define noundef i64 @udiv64_2(i64 noundef %i) {
; GFX9-LABEL: udiv64_2:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_alignbit_b32 v0, v1, v0, 1
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 1, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: udiv64_2:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 1
; GFX942-NEXT: v_lshrrev_b32_e32 v1, 1, v1
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: udiv64_2:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_alignbit_b32 v0, v1, v0, 1
; GFX1030-NEXT: v_lshrrev_b32_e32 v1, 1, v1
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = udiv i64 %i, 2
ret i64 %div
}
define noundef i64 @srem64_64(i64 noundef %i) {
; GFX9-LABEL: srem64_64:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 26, v2
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
; GFX9-NEXT: v_and_b32_e32 v2, 0xffffffc0, v2
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: srem64_64:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 26, v2
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, v[2:3]
; GFX942-NEXT: v_and_b32_e32 v2, 0xffffffc0, v2
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: srem64_64:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 26, v2
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v1, vcc_lo
; GFX1030-NEXT: v_and_b32_e32 v2, 0xffffffc0, v2
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = srem i64 %i, 64
ret i64 %rem
}
define noundef i64 @sdiv64_64(i64 noundef %i) {
; GFX9-LABEL: sdiv64_64:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 26, v2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: v_ashrrev_i64 v[0:1], 6, v[0:1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: sdiv64_64:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 26, v2
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-NEXT: v_ashrrev_i64 v[0:1], 6, v[0:1]
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: sdiv64_64:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 26, v2
; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1030-NEXT: v_ashrrev_i64 v[0:1], 6, v[0:1]
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = sdiv i64 %i, 64
ret i64 %div
}
define noundef i64 @urem64_64(i64 noundef %i) {
; GFX9-LABEL: urem64_64:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v0, 63, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: urem64_64:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v0, 63, v0
; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: urem64_64:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_and_b32_e32 v0, 63, v0
; GFX1030-NEXT: v_mov_b32_e32 v1, 0
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = urem i64 %i, 64
ret i64 %rem
}
define noundef i64 @udiv64_64(i64 noundef %i) {
; GFX9-LABEL: udiv64_64:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_alignbit_b32 v0, v1, v0, 6
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 6, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: udiv64_64:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 6
; GFX942-NEXT: v_lshrrev_b32_e32 v1, 6, v1
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: udiv64_64:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_alignbit_b32 v0, v1, v0, 6
; GFX1030-NEXT: v_lshrrev_b32_e32 v1, 6, v1
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = udiv i64 %i, 64
ret i64 %div
}
define noundef i64 @srem64_i32min(i64 noundef %i) {
; GFX9-LABEL: srem64_i32min:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v2
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
; GFX9-NEXT: v_and_b32_e32 v2, 0x80000000, v2
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: srem64_i32min:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 1, v2
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, v[2:3]
; GFX942-NEXT: v_and_b32_e32 v2, 0x80000000, v2
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: srem64_i32min:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 1, v2
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v1, vcc_lo
; GFX1030-NEXT: v_and_b32_e32 v2, 0x80000000, v2
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = srem i64 %i, -2147483648
ret i64 %rem
}
define noundef i64 @sdiv64_i32min(i64 noundef %i) {
; GFX9-LABEL: sdiv64_i32min:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, 0, v0
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: sdiv64_i32min:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 1, v2
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, 0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: sdiv64_i32min:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 1, v2
; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1030-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, 0, v0
; GFX1030-NEXT: v_sub_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = sdiv i64 %i, -2147483648
ret i64 %div
}
define noundef i64 @urem64_i32min(i64 noundef %i) {
; GFX9-LABEL: urem64_i32min:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_alignbit_b32 v4, v1, v0, 31
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, 1, v[2:3]
; GFX9-NEXT: v_add_lshl_u32 v2, v2, v4, 30
; GFX9-NEXT: v_and_b32_e32 v2, 0x80000000, v2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: urem64_i32min:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_alignbit_b32 v4, v1, v0, 31
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v4, 1, v[2:3]
; GFX942-NEXT: v_add_lshl_u32 v2, v2, v5, 30
; GFX942-NEXT: v_and_b32_e32 v2, 0x80000000, v2
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: urem64_i32min:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_alignbit_b32 v4, v1, v0, 31
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, v4, 1, v[2:3]
; GFX1030-NEXT: v_add_lshl_u32 v2, v2, v4, 30
; GFX1030-NEXT: v_and_b32_e32 v2, 0x80000000, v2
; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = urem i64 %i, -2147483648
ret i64 %rem
}
define noundef i64 @udiv64_i32min(i64 noundef %i) {
; GFX9-LABEL: udiv64_i32min:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_alignbit_b32 v2, v1, v0, 31
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 31, v1
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, 1, v[0:1]
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e64 v1, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_alignbit_b32 v0, v1, v0, 1
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: udiv64_i32min:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_alignbit_b32 v2, v1, v0, 31
; GFX942-NEXT: v_lshrrev_b32_e32 v0, 31, v1
; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v2, 1, v[0:1]
; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 1
; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: udiv64_i32min:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mov_b32_e32 v2, 0
; GFX1030-NEXT: v_alignbit_b32 v0, v1, v0, 31
; GFX1030-NEXT: v_lshrrev_b32_e32 v1, 31, v1
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v0, 1, v[1:2]
; GFX1030-NEXT: v_add_co_u32 v0, s4, v1, v3
; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, 0, 0, s4
; GFX1030-NEXT: v_alignbit_b32 v0, v1, v0, 1
; GFX1030-NEXT: v_mov_b32_e32 v1, 0
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = udiv i64 %i, -2147483648
ret i64 %div
}
define noundef i64 @srem64_i32max(i64 noundef %i) {
; GFX9-LABEL: srem64_i32max:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mul_hi_u32 v2, v0, 3
; GFX9-NEXT: v_ashrrev_i32_e32 v8, 31, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v8, 3, 0
; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, 3, v[2:3]
; GFX9-NEXT: v_lshl_add_u32 v2, v8, 31, v8
; GFX9-NEXT: v_add3_u32 v5, v5, v2, v4
; GFX9-NEXT: v_mov_b32_e32 v2, v6
; GFX9-NEXT: s_mov_b32 s6, 0x80000001
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s6, v[2:3]
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, -1, v[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v7, v3
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_sub_u32_e32 v5, v5, v1
; GFX9-NEXT: v_sub_u32_e32 v5, v5, v0
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, 1, v[2:3]
; GFX9-NEXT: s_brev_b32 s6, -2
; GFX9-NEXT: v_add_u32_e32 v3, v1, v3
; GFX9-NEXT: v_ashrrev_i64 v[4:5], 30, v[2:3]
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 31, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v4, v2
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, s6, 0
; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v5, vcc
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, s6, v[3:4]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: srem64_i32max:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_lshl_add_u32 v4, v2, 31, v2
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v2, 3, 0
; GFX942-NEXT: v_add3_u32 v3, v3, v4, v2
; GFX942-NEXT: v_mul_hi_u32 v4, v0, 3
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: v_mad_u64_u32 v[6:7], s[0:1], v1, 3, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v7
; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: s_mov_b32 s2, 0x80000001
; GFX942-NEXT: v_mad_u64_u32 v[6:7], s[0:1], v0, s2, v[6:7]
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v0, -1, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v6, v7
; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: v_sub_u32_e32 v3, v3, v1
; GFX942-NEXT: v_lshl_add_u64 v[6:7], v[4:5], 0, v[6:7]
; GFX942-NEXT: v_sub_u32_e32 v3, v3, v0
; GFX942-NEXT: v_mad_u64_u32 v[6:7], s[0:1], v1, s2, v[6:7]
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[6:7], 0, v[2:3]
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v0, 1, v[2:3]
; GFX942-NEXT: v_add_u32_e32 v3, v1, v3
; GFX942-NEXT: v_ashrrev_i64 v[6:7], 30, v[2:3]
; GFX942-NEXT: v_lshrrev_b32_e32 v4, 31, v3
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[6:7], 0, v[4:5]
; GFX942-NEXT: s_brev_b32 s2, -2
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, s2, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, s2, v[2:3]
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: srem64_i32max:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, v0, 3
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_ashrrev_i32_e32 v8, 31, v1
; GFX1030-NEXT: v_mad_u64_u32 v[6:7], null, v8, 3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, v1, 3, v[2:3]
; GFX1030-NEXT: v_lshl_add_u32 v8, v8, 31, v8
; GFX1030-NEXT: v_add3_u32 v7, v7, v8, v6
; GFX1030-NEXT: v_mov_b32_e32 v2, v4
; GFX1030-NEXT: v_mad_u64_u32 v[6:7], null, v0, -1, v[6:7]
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x80000001, v0, v[2:3]
; GFX1030-NEXT: v_sub_nc_u32_e32 v4, v7, v1
; GFX1030-NEXT: v_add_co_u32 v2, s4, v5, v3
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_sub_nc_u32_e32 v4, v4, v0
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x80000001, v1, v[2:3]
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v2, v6
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, v3, v4, vcc_lo
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v0, 1, v[2:3]
; GFX1030-NEXT: v_add_nc_u32_e32 v3, v1, v3
; GFX1030-NEXT: v_ashrrev_i64 v[4:5], 30, v[2:3]
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 31, v3
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v4, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v4, null, 0, v5, vcc_lo
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x7fffffff, v2, 0
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, 0x7fffffff, v4, v[3:4]
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = srem i64 %i, 2147483647
ret i64 %rem
}
define noundef i64 @sdiv64_i32max(i64 noundef %i) {
; GFX9-LABEL: sdiv64_i32max:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mul_hi_u32 v2, v0, 3
; GFX9-NEXT: v_ashrrev_i32_e32 v8, 31, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v8, 3, 0
; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, 3, v[2:3]
; GFX9-NEXT: v_lshl_add_u32 v2, v8, 31, v8
; GFX9-NEXT: v_add3_u32 v5, v5, v2, v4
; GFX9-NEXT: v_mov_b32_e32 v2, v6
; GFX9-NEXT: s_mov_b32 s6, 0x80000001
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s6, v[2:3]
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, -1, v[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v7, v3
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_sub_u32_e32 v5, v5, v1
; GFX9-NEXT: v_sub_u32_e32 v5, v5, v0
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, 1, v[2:3]
; GFX9-NEXT: v_add_u32_e32 v3, v1, v3
; GFX9-NEXT: v_ashrrev_i64 v[0:1], 30, v[2:3]
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 31, v3
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: sdiv64_i32max:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_lshl_add_u32 v4, v2, 31, v2
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v2, 3, 0
; GFX942-NEXT: v_add3_u32 v3, v3, v4, v2
; GFX942-NEXT: v_mul_hi_u32 v4, v0, 3
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: v_mad_u64_u32 v[6:7], s[0:1], v1, 3, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v7
; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: s_mov_b32 s2, 0x80000001
; GFX942-NEXT: v_mad_u64_u32 v[6:7], s[0:1], v0, s2, v[6:7]
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v0, -1, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v6, v7
; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: v_sub_u32_e32 v3, v3, v1
; GFX942-NEXT: v_lshl_add_u64 v[6:7], v[4:5], 0, v[6:7]
; GFX942-NEXT: v_sub_u32_e32 v3, v3, v0
; GFX942-NEXT: v_mad_u64_u32 v[6:7], s[0:1], v1, s2, v[6:7]
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[6:7], 0, v[2:3]
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v0, 1, v[2:3]
; GFX942-NEXT: v_add_u32_e32 v3, v1, v3
; GFX942-NEXT: v_ashrrev_i64 v[0:1], 30, v[2:3]
; GFX942-NEXT: v_lshrrev_b32_e32 v4, 31, v3
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[4:5]
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: sdiv64_i32max:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, v0, 3
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_ashrrev_i32_e32 v8, 31, v1
; GFX1030-NEXT: v_mad_u64_u32 v[6:7], null, v8, 3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, v1, 3, v[2:3]
; GFX1030-NEXT: v_lshl_add_u32 v8, v8, 31, v8
; GFX1030-NEXT: v_add3_u32 v7, v7, v8, v6
; GFX1030-NEXT: v_mov_b32_e32 v2, v4
; GFX1030-NEXT: v_mad_u64_u32 v[6:7], null, v0, -1, v[6:7]
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x80000001, v0, v[2:3]
; GFX1030-NEXT: v_sub_nc_u32_e32 v4, v7, v1
; GFX1030-NEXT: v_add_co_u32 v2, s4, v5, v3
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_sub_nc_u32_e32 v4, v4, v0
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x80000001, v1, v[2:3]
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v2, v6
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, v3, v4, vcc_lo
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v0, 1, v[2:3]
; GFX1030-NEXT: v_add_nc_u32_e32 v3, v1, v3
; GFX1030-NEXT: v_ashrrev_i64 v[0:1], 30, v[2:3]
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 31, v3
; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = sdiv i64 %i, 2147483647
ret i64 %div
}
define noundef i64 @urem64_i32max(i64 noundef %i) {
; GFX9-LABEL: urem64_i32max:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mul_hi_u32 v2, v0, 5
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_brev_b32 s6, -2
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, 5, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v2, v4
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, 2, v[2:3]
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v5, v3
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, 2, v[2:3]
; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
; GFX9-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v4, v2
; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v5, v3, vcc
; GFX9-NEXT: v_alignbit_b32 v2, v4, v2, 30
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, s6, 0
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 30, v4
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, s6, v[3:4]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: urem64_i32max:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mul_hi_u32 v2, v0, 5
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, 5, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, 2, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v1, 2, v[2:3]
; GFX942-NEXT: v_sub_co_u32_e32 v4, vcc, v0, v2
; GFX942-NEXT: s_brev_b32 s2, -2
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
; GFX942-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[4:5], 0, v[2:3]
; GFX942-NEXT: v_alignbit_b32 v2, v3, v2, 30
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, s2, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_lshrrev_b32_e32 v3, 30, v3
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, s2, v[2:3]
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: urem64_i32max:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, v0, 5
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, v1, 5, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v2, v4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v0, 2, v[2:3]
; GFX1030-NEXT: v_add_co_u32 v2, s4, v5, v3
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v1, 2, v[2:3]
; GFX1030-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
; GFX1030-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v4, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v4, null, v5, v3, vcc_lo
; GFX1030-NEXT: v_alignbit_b32 v2, v4, v2, 30
; GFX1030-NEXT: v_lshrrev_b32_e32 v4, 30, v4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x7fffffff, v2, 0
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, 0x7fffffff, v4, v[3:4]
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = urem i64 %i, 2147483647
ret i64 %rem
}
define noundef i64 @udiv64_i32max(i64 noundef %i) {
; GFX9-LABEL: udiv64_i32max:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mul_hi_u32 v2, v0, 5
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, 5, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v2, v4
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, 2, v[2:3]
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v5, v3
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, 2, v[2:3]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: v_alignbit_b32 v0, v1, v0, 30
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 30, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: udiv64_i32max:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mul_hi_u32 v2, v0, 5
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, 5, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, 2, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v1, 2, v[2:3]
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX942-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 30
; GFX942-NEXT: v_lshrrev_b32_e32 v1, 30, v1
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: udiv64_i32max:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, v0, 5
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, v1, 5, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v2, v4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v0, 2, v[2:3]
; GFX1030-NEXT: v_add_co_u32 v2, s4, v5, v3
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v1, 2, v[2:3]
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1030-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1030-NEXT: v_alignbit_b32 v0, v1, v0, 30
; GFX1030-NEXT: v_lshrrev_b32_e32 v1, 30, v1
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = udiv i64 %i, 2147483647
ret i64 %div
}