ValueTracking/AMDGPU: handle mbcnt in computeKnownBitsFromOperator (#183229)

This helps canonicalize some address calculation. This would further
help immediate folding into memory load instructions in the backend.

The order changes to v_mad_u32_u24 is just because
@llvm.amdgcn.mul.u24.i32 was used in codegen prepare after this change.
It does not really change anything important.
This commit is contained in:
Ruiling, Song 2026-03-02 10:48:15 +08:00 committed by GitHub
parent e95dabef96
commit 686987a540
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 197 additions and 108 deletions

View File

@ -2265,6 +2265,16 @@ static void computeKnownBitsFromOperator(const Operator *I,
Known.Zero.setBitsFrom(KnownZeroFirstBit);
break;
}
case Intrinsic::amdgcn_mbcnt_hi:
case Intrinsic::amdgcn_mbcnt_lo: {
// Wave64 mbcnt_lo returns at most 32 + src1. Otherwise these return at
// most 31 + src1.
Known.Zero.setBitsFrom(
II->getIntrinsicID() == Intrinsic::amdgcn_mbcnt_lo ? 6 : 5);
computeKnownBits(I->getOperand(1), Known2, Q, Depth + 1);
Known = KnownBits::add(Known, Known2);
break;
}
case Intrinsic::vscale: {
if (!II->getParent() || !II->getFunction())
break;

View File

@ -40,7 +40,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_readfirstlane_b32 s4, v1
; GFX6-NEXT: v_mad_u32_u24 v0, v0, 5, s4
; GFX6-NEXT: v_mad_u32_u24 v0, 5, v0, s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
@ -66,7 +66,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s2, v1
; GFX8-NEXT: v_mad_u32_u24 v2, v0, 5, s2
; GFX8-NEXT: v_mad_u32_u24 v2, 5, v0, s2
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
@ -95,7 +95,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s2, v1
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX9-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_store_dword v2, v0, s[0:1]
; GFX9-NEXT: s_endpgm
@ -123,7 +123,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX10W64-NEXT: s_waitcnt vmcnt(0)
; GFX10W64-NEXT: v_readfirstlane_b32 s2, v1
; GFX10W64-NEXT: v_mov_b32_e32 v1, 0
; GFX10W64-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX10W64-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX10W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX10W64-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10W64-NEXT: s_endpgm
@ -150,7 +150,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX10W32-NEXT: s_waitcnt vmcnt(0)
; GFX10W32-NEXT: v_readfirstlane_b32 s2, v1
; GFX10W32-NEXT: v_mov_b32_e32 v1, 0
; GFX10W32-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX10W32-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX10W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX10W32-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10W32-NEXT: s_endpgm
@ -180,7 +180,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX11W64-NEXT: v_readfirstlane_b32 s2, v1
; GFX11W64-NEXT: v_mov_b32_e32 v1, 0
; GFX11W64-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11W64-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX11W64-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX11W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W64-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11W64-NEXT: s_endpgm
@ -209,7 +209,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX11W32-NEXT: v_readfirstlane_b32 s2, v1
; GFX11W32-NEXT: v_mov_b32_e32 v1, 0
; GFX11W32-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11W32-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX11W32-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX11W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W32-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11W32-NEXT: s_endpgm
@ -241,7 +241,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX12W64-NEXT: v_mov_b32_e32 v1, 0
; GFX12W64-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12W64-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12W64-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX12W64-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX12W64-NEXT: s_wait_kmcnt 0x0
; GFX12W64-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX12W64-NEXT: s_endpgm
@ -271,7 +271,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX12W32-NEXT: v_readfirstlane_b32 s2, v1
; GFX12W32-NEXT: v_mov_b32_e32 v1, 0
; GFX12W32-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12W32-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX12W32-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX12W32-NEXT: s_wait_kmcnt 0x0
; GFX12W32-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX12W32-NEXT: s_endpgm

View File

@ -59,7 +59,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000
; GFX7LESS-NEXT: s_mov_b32 s2, -1
; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v1
; GFX7LESS-NEXT: v_mad_u32_u24 v0, v0, 5, s4
; GFX7LESS-NEXT: v_mad_u32_u24 v0, 5, v0, s4
; GFX7LESS-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX7LESS-NEXT: s_endpgm
;
@ -91,7 +91,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: v_mad_u32_u24 v0, v0, 5, s4
; GFX8-NEXT: v_mad_u32_u24 v0, 5, v0, s4
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
@ -123,7 +123,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: v_mad_u32_u24 v0, v0, 5, s4
; GFX9-NEXT: v_mad_u32_u24 v0, 5, v0, s4
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@ -156,7 +156,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: v_readfirstlane_b32 s2, v1
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX1064-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX1064-NEXT: s_mov_b32 s2, -1
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
@ -189,7 +189,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: v_readfirstlane_b32 s2, v1
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX1032-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX1032-NEXT: s_mov_b32 s2, -1
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
@ -223,7 +223,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: v_readfirstlane_b32 s2, v1
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
; GFX1164-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX1164-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX1164-NEXT: s_endpgm
@ -256,7 +256,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_readfirstlane_b32 s2, v1
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
; GFX1132-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX1132-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX1132-NEXT: s_endpgm
@ -291,7 +291,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX1264-NEXT: s_wait_kmcnt 0x0
; GFX1264-NEXT: v_readfirstlane_b32 s2, v1
; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
; GFX1264-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX1264-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX1264-NEXT: s_mov_b32 s2, -1
; GFX1264-NEXT: buffer_store_b32 v0, off, s[0:3], null
; GFX1264-NEXT: s_endpgm
@ -323,7 +323,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX1232-NEXT: s_wait_kmcnt 0x0
; GFX1232-NEXT: v_readfirstlane_b32 s2, v1
; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
; GFX1232-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX1232-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX1232-NEXT: s_mov_b32 s2, -1
; GFX1232-NEXT: buffer_store_b32 v0, off, s[0:3], null
; GFX1232-NEXT: s_endpgm
@ -1630,15 +1630,16 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace
; GFX8-NEXT: buffer_wbinvl1_vol
; GFX8-NEXT: .LBB3_2:
; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX8-NEXT: v_readfirstlane_b32 s4, v1
; GFX8-NEXT: v_readfirstlane_b32 s5, v0
; GFX8-NEXT: v_mul_u32_u24_e32 v0, 5, v2
; GFX8-NEXT: v_mul_hi_u32_u24_e32 v1, 5, v2
; GFX8-NEXT: v_mov_b32_e32 v2, s4
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s5, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s2, v1
; GFX8-NEXT: v_readfirstlane_b32 s3, v0
; GFX8-NEXT: v_mov_b32_e32 v0, s3
; GFX8-NEXT: v_mov_b32_e32 v1, s2
; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v2, 5, v[0:1]
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: s_nop 2
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc
; GFX8-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
@ -1672,7 +1673,7 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace
; GFX9-NEXT: v_readfirstlane_b32 s3, v0
; GFX9-NEXT: v_mov_b32_e32 v0, s3
; GFX9-NEXT: v_mov_b32_e32 v1, s2
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v2, 5, v[0:1]
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[2:3], 5, v2, v[0:1]
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_nop 2
@ -1709,7 +1710,7 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: v_readfirstlane_b32 s3, v1
; GFX1064-NEXT: v_readfirstlane_b32 s2, v0
; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v2, 5, s[2:3]
; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[2:3], 5, v2, s[2:3]
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_mov_b32 s2, -1
; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@ -1744,7 +1745,7 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: v_readfirstlane_b32 s3, v1
; GFX1032-NEXT: v_readfirstlane_b32 s2, v0
; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s2, v2, 5, s[2:3]
; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s2, 5, v2, s[2:3]
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_mov_b32 s2, -1
; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@ -1781,7 +1782,7 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace
; GFX1164-NEXT: v_readfirstlane_b32 s3, v1
; GFX1164-NEXT: v_readfirstlane_b32 s2, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-NEXT: v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3]
; GFX1164-NEXT: v_mad_u64_u32 v[0:1], null, 5, v2, s[2:3]
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
@ -1816,7 +1817,7 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace
; GFX1132-NEXT: v_readfirstlane_b32 s3, v1
; GFX1132-NEXT: v_readfirstlane_b32 s2, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3]
; GFX1132-NEXT: v_mad_u64_u32 v[0:1], null, 5, v2, s[2:3]
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
@ -1854,7 +1855,7 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace
; GFX1264-NEXT: v_readfirstlane_b32 s3, v1
; GFX1264-NEXT: v_readfirstlane_b32 s2, v0
; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1264-NEXT: v_mad_co_u64_u32 v[0:1], null, v2, 5, s[2:3]
; GFX1264-NEXT: v_mad_co_u64_u32 v[0:1], null, 5, v2, s[2:3]
; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
; GFX1264-NEXT: s_mov_b32 s2, -1
; GFX1264-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null
@ -1888,7 +1889,7 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace
; GFX1232-NEXT: v_readfirstlane_b32 s3, v1
; GFX1232-NEXT: v_readfirstlane_b32 s2, v0
; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1232-NEXT: v_mad_co_u64_u32 v[0:1], null, v2, 5, s[2:3]
; GFX1232-NEXT: v_mad_co_u64_u32 v[0:1], null, 5, v2, s[2:3]
; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
; GFX1232-NEXT: s_mov_b32 s2, -1
; GFX1232-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null

View File

@ -45,7 +45,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out) {
; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000
; GFX7LESS-NEXT: s_mov_b32 s2, -1
; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v1
; GFX7LESS-NEXT: v_mad_u32_u24 v0, v0, 5, s4
; GFX7LESS-NEXT: v_mad_u32_u24 v0, 5, v0, s4
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX7LESS-NEXT: s_endpgm
@ -73,7 +73,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out) {
; GFX8-NEXT: v_readfirstlane_b32 s4, v1
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: v_mad_u32_u24 v0, v0, 5, s4
; GFX8-NEXT: v_mad_u32_u24 v0, 5, v0, s4
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
@ -100,7 +100,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out) {
; GFX9-NEXT: v_readfirstlane_b32 s4, v1
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: v_mad_u32_u24 v0, v0, 5, s4
; GFX9-NEXT: v_mad_u32_u24 v0, 5, v0, s4
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
@ -128,7 +128,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out) {
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064-NEXT: v_readfirstlane_b32 s2, v1
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX1064-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX1064-NEXT: s_mov_b32 s2, -1
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
@ -156,7 +156,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out) {
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032-NEXT: v_readfirstlane_b32 s2, v1
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX1032-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX1032-NEXT: s_mov_b32 s2, -1
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
@ -186,7 +186,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out) {
; GFX1164-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164-NEXT: v_readfirstlane_b32 s2, v1
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
; GFX1164-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX1164-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@ -214,7 +214,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out) {
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132-NEXT: v_readfirstlane_b32 s2, v1
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
; GFX1132-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX1132-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@ -1499,16 +1499,17 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) {
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: .LBB4_2:
; GFX8-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8-NEXT: v_readfirstlane_b32 s2, v1
; GFX8-NEXT: v_readfirstlane_b32 s3, v0
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8-NEXT: v_mov_b32_e32 v0, s3
; GFX8-NEXT: v_mov_b32_e32 v1, s2
; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v2, 5, v[0:1]
; GFX8-NEXT: v_readfirstlane_b32 s4, v1
; GFX8-NEXT: v_readfirstlane_b32 s5, v0
; GFX8-NEXT: v_mul_u32_u24_e32 v0, 5, v2
; GFX8-NEXT: v_mul_hi_u32_u24_e32 v1, 5, v2
; GFX8-NEXT: v_mov_b32_e32 v2, s4
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s5, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_nop 1
; GFX8-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
@ -1535,7 +1536,7 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) {
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, s3
; GFX9-NEXT: v_mov_b32_e32 v1, s2
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v2, 5, v[0:1]
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[2:3], 5, v2, v[0:1]
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@ -1566,7 +1567,7 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) {
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064-NEXT: v_readfirstlane_b32 s3, v1
; GFX1064-NEXT: v_readfirstlane_b32 s2, v0
; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v2, 5, s[2:3]
; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[2:3], 5, v2, s[2:3]
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_mov_b32 s2, -1
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
@ -1595,7 +1596,7 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) {
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032-NEXT: v_readfirstlane_b32 s3, v1
; GFX1032-NEXT: v_readfirstlane_b32 s2, v0
; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s2, v2, 5, s[2:3]
; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s2, 5, v2, s[2:3]
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_mov_b32 s2, -1
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
@ -1627,7 +1628,7 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) {
; GFX1164-NEXT: v_readfirstlane_b32 s3, v1
; GFX1164-NEXT: v_readfirstlane_b32 s2, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-NEXT: v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3]
; GFX1164-NEXT: v_mad_u64_u32 v[0:1], null, 5, v2, s[2:3]
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
@ -1658,7 +1659,7 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) {
; GFX1132-NEXT: v_readfirstlane_b32 s3, v1
; GFX1132-NEXT: v_readfirstlane_b32 s2, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3]
; GFX1132-NEXT: v_mad_u64_u32 v[0:1], null, 5, v2, s[2:3]
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)

View File

@ -37,7 +37,7 @@ define amdgpu_ps void @add_i32_constant(ptr addrspace(8) inreg %out, ptr addrspa
; GFX7-NEXT: s_or_b64 exec, exec, s[10:11]
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_readfirstlane_b32 s4, v1
; GFX7-NEXT: v_mad_u32_u24 v0, v0, 5, s4
; GFX7-NEXT: v_mad_u32_u24 v0, 5, v0, s4
; GFX7-NEXT: .LBB0_4: ; %Flow
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
; GFX7-NEXT: s_wqm_b64 s[4:5], -1
@ -72,7 +72,7 @@ define amdgpu_ps void @add_i32_constant(ptr addrspace(8) inreg %out, ptr addrspa
; GFX89-NEXT: s_or_b64 exec, exec, s[10:11]
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: v_readfirstlane_b32 s4, v1
; GFX89-NEXT: v_mad_u32_u24 v0, v0, 5, s4
; GFX89-NEXT: v_mad_u32_u24 v0, 5, v0, s4
; GFX89-NEXT: .LBB0_4: ; %Flow
; GFX89-NEXT: s_or_b64 exec, exec, s[8:9]
; GFX89-NEXT: s_wqm_b64 s[4:5], -1
@ -108,7 +108,7 @@ define amdgpu_ps void @add_i32_constant(ptr addrspace(8) inreg %out, ptr addrspa
; GFX1064-NEXT: s_or_b64 exec, exec, s[10:11]
; GFX1064-NEXT: s_waitcnt vmcnt(0)
; GFX1064-NEXT: v_readfirstlane_b32 s4, v1
; GFX1064-NEXT: v_mad_u32_u24 v0, v0, 5, s4
; GFX1064-NEXT: v_mad_u32_u24 v0, 5, v0, s4
; GFX1064-NEXT: .LBB0_4: ; %Flow
; GFX1064-NEXT: s_or_b64 exec, exec, s[8:9]
; GFX1064-NEXT: s_wqm_b64 s[4:5], -1
@ -143,7 +143,7 @@ define amdgpu_ps void @add_i32_constant(ptr addrspace(8) inreg %out, ptr addrspa
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s9
; GFX1032-NEXT: s_waitcnt vmcnt(0)
; GFX1032-NEXT: v_readfirstlane_b32 s4, v1
; GFX1032-NEXT: v_mad_u32_u24 v0, v0, 5, s4
; GFX1032-NEXT: v_mad_u32_u24 v0, 5, v0, s4
; GFX1032-NEXT: .LBB0_4: ; %Flow
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s8
; GFX1032-NEXT: s_wqm_b32 s4, -1
@ -182,7 +182,7 @@ define amdgpu_ps void @add_i32_constant(ptr addrspace(8) inreg %out, ptr addrspa
; GFX1164-NEXT: s_waitcnt vmcnt(0)
; GFX1164-NEXT: v_readfirstlane_b32 s4, v1
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-NEXT: v_mad_u32_u24 v0, v0, 5, s4
; GFX1164-NEXT: v_mad_u32_u24 v0, 5, v0, s4
; GFX1164-NEXT: .LBB0_4: ; %Flow
; GFX1164-NEXT: s_or_b64 exec, exec, s[8:9]
; GFX1164-NEXT: s_wqm_b64 s[4:5], -1
@ -221,7 +221,7 @@ define amdgpu_ps void @add_i32_constant(ptr addrspace(8) inreg %out, ptr addrspa
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_readfirstlane_b32 s4, v1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_mad_u32_u24 v0, v0, 5, s4
; GFX1132-NEXT: v_mad_u32_u24 v0, 5, v0, s4
; GFX1132-NEXT: .LBB0_4: ; %Flow
; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s8
; GFX1132-NEXT: s_wqm_b32 s4, -1

View File

@ -39,7 +39,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_readfirstlane_b32 s4, v1
; GFX6-NEXT: v_mad_u32_u24 v0, v0, 5, s4
; GFX6-NEXT: v_mad_u32_u24 v0, 5, v0, s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
@ -65,7 +65,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s2, v1
; GFX8-NEXT: v_mad_u32_u24 v2, v0, 5, s2
; GFX8-NEXT: v_mad_u32_u24 v2, 5, v0, s2
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
@ -94,7 +94,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s2, v1
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX9-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_store_dword v2, v0, s[0:1]
; GFX9-NEXT: s_endpgm
@ -122,7 +122,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX10W64-NEXT: s_waitcnt vmcnt(0)
; GFX10W64-NEXT: v_readfirstlane_b32 s2, v1
; GFX10W64-NEXT: v_mov_b32_e32 v1, 0
; GFX10W64-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX10W64-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX10W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX10W64-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10W64-NEXT: s_endpgm
@ -149,7 +149,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX10W32-NEXT: s_waitcnt vmcnt(0)
; GFX10W32-NEXT: v_readfirstlane_b32 s2, v1
; GFX10W32-NEXT: v_mov_b32_e32 v1, 0
; GFX10W32-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX10W32-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX10W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX10W32-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10W32-NEXT: s_endpgm
@ -179,7 +179,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX11W64-NEXT: v_readfirstlane_b32 s2, v1
; GFX11W64-NEXT: v_mov_b32_e32 v1, 0
; GFX11W64-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11W64-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX11W64-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX11W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W64-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11W64-NEXT: s_endpgm
@ -208,7 +208,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX11W32-NEXT: v_readfirstlane_b32 s2, v1
; GFX11W32-NEXT: v_mov_b32_e32 v1, 0
; GFX11W32-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11W32-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX11W32-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX11W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W32-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11W32-NEXT: s_endpgm
@ -240,7 +240,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX12W64-NEXT: v_mov_b32_e32 v1, 0
; GFX12W64-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12W64-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12W64-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX12W64-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX12W64-NEXT: s_wait_kmcnt 0x0
; GFX12W64-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX12W64-NEXT: s_endpgm
@ -270,7 +270,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX12W32-NEXT: v_readfirstlane_b32 s2, v1
; GFX12W32-NEXT: v_mov_b32_e32 v1, 0
; GFX12W32-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12W32-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX12W32-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX12W32-NEXT: s_wait_kmcnt 0x0
; GFX12W32-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX12W32-NEXT: s_endpgm

View File

@ -40,7 +40,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_readfirstlane_b32 s4, v1
; GFX6-NEXT: v_mad_u32_u24 v0, v0, 5, s4
; GFX6-NEXT: v_mad_u32_u24 v0, 5, v0, s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
@ -67,7 +67,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s2, v1
; GFX8-NEXT: v_mad_u32_u24 v2, v0, 5, s2
; GFX8-NEXT: v_mad_u32_u24 v2, 5, v0, s2
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
@ -97,7 +97,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s2, v1
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX9-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_store_dword v2, v0, s[0:1]
; GFX9-NEXT: s_endpgm
@ -126,7 +126,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX10W64-NEXT: s_waitcnt vmcnt(0)
; GFX10W64-NEXT: v_readfirstlane_b32 s2, v1
; GFX10W64-NEXT: v_mov_b32_e32 v1, 0
; GFX10W64-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX10W64-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX10W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX10W64-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10W64-NEXT: s_endpgm
@ -154,7 +154,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX10W32-NEXT: s_waitcnt vmcnt(0)
; GFX10W32-NEXT: v_readfirstlane_b32 s2, v1
; GFX10W32-NEXT: v_mov_b32_e32 v1, 0
; GFX10W32-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX10W32-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX10W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX10W32-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10W32-NEXT: s_endpgm
@ -185,7 +185,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX11W64-NEXT: v_readfirstlane_b32 s2, v1
; GFX11W64-NEXT: v_mov_b32_e32 v1, 0
; GFX11W64-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11W64-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX11W64-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX11W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W64-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11W64-NEXT: s_endpgm
@ -215,7 +215,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX11W32-NEXT: v_readfirstlane_b32 s2, v1
; GFX11W32-NEXT: v_mov_b32_e32 v1, 0
; GFX11W32-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11W32-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX11W32-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX11W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W32-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11W32-NEXT: s_endpgm
@ -248,7 +248,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX12W64-NEXT: v_mov_b32_e32 v1, 0
; GFX12W64-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12W64-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12W64-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX12W64-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX12W64-NEXT: s_wait_kmcnt 0x0
; GFX12W64-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX12W64-NEXT: s_endpgm
@ -278,7 +278,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX12W32-NEXT: v_readfirstlane_b32 s2, v1
; GFX12W32-NEXT: v_mov_b32_e32 v1, 0
; GFX12W32-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12W32-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX12W32-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX12W32-NEXT: s_wait_kmcnt 0x0
; GFX12W32-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX12W32-NEXT: s_endpgm

View File

@ -905,7 +905,7 @@ define amdgpu_kernel void @atomic_add_ret_local(ptr addrspace(1) %out, ptr addrs
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s2, v1
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX9-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX9-NEXT: global_store_dword v2, v0, s[0:1]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
@ -934,7 +934,7 @@ define amdgpu_kernel void @atomic_add_ret_local(ptr addrspace(1) %out, ptr addrs
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: v_readfirstlane_b32 s2, v1
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
; GFX90A-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX90A-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX90A-NEXT: global_store_dword v2, v0, s[0:1]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_endpgm
@ -964,7 +964,7 @@ define amdgpu_kernel void @atomic_add_ret_local(ptr addrspace(1) %out, ptr addrs
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_readfirstlane_b32 s2, v1
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX10-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_endpgm
@ -993,7 +993,7 @@ define amdgpu_kernel void @atomic_add_ret_local(ptr addrspace(1) %out, ptr addrs
; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-FLATSCR-NEXT: v_readfirstlane_b32 s2, v1
; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0
; GFX9-FLATSCR-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX9-FLATSCR-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX9-FLATSCR-NEXT: global_store_dword v2, v0, s[0:1]
; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
; GFX9-FLATSCR-NEXT: s_endpgm
@ -1024,7 +1024,7 @@ define amdgpu_kernel void @atomic_add_ret_local(ptr addrspace(1) %out, ptr addrs
; GFX11-NEXT: v_readfirstlane_b32 s2, v1
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX11-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_endpgm
@ -1058,7 +1058,7 @@ define amdgpu_kernel void @atomic_add_ret_local(ptr addrspace(1) %out, ptr addrs
; GFX12-NEXT: v_mov_b32_e32 v1, 0
; GFX12-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX12-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX12-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: s_endpgm
@ -1096,7 +1096,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s2, v1
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX9-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX9-NEXT: global_store_dword v2, v0, s[0:1]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
@ -1124,7 +1124,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: v_readfirstlane_b32 s2, v1
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
; GFX90A-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX90A-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX90A-NEXT: global_store_dword v2, v0, s[0:1]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_endpgm
@ -1152,7 +1152,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_readfirstlane_b32 s2, v1
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX10-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_endpgm
@ -1180,7 +1180,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-FLATSCR-NEXT: v_readfirstlane_b32 s2, v1
; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0
; GFX9-FLATSCR-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX9-FLATSCR-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX9-FLATSCR-NEXT: global_store_dword v2, v0, s[0:1]
; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
; GFX9-FLATSCR-NEXT: s_endpgm
@ -1210,7 +1210,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX11-NEXT: v_readfirstlane_b32 s2, v1
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX11-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_endpgm
@ -1241,7 +1241,7 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
; GFX12-NEXT: v_readfirstlane_b32 s2, v1
; GFX12-NEXT: v_mov_b32_e32 v1, 0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX12-NEXT: v_mad_u32_u24 v0, 5, v0, s2
; GFX12-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: s_endpgm

View File

@ -11,8 +11,8 @@ define amdgpu_ps <2 x float> @global_load_scale_add_foldable_knownbits(ptr addrs
; GFX12: ; %bb.0:
; GFX12-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_lshl_add_u32 v0, v0, 3, 0x80
; GFX12-NEXT: global_load_b64 v[0:1], v0, s[2:3]
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX12-NEXT: global_load_b64 v[0:1], v0, s[2:3] offset:128
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
;
@ -20,9 +20,7 @@ define amdgpu_ps <2 x float> @global_load_scale_add_foldable_knownbits(ptr addrs
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_lshl_add_u32 v0, v0, 3, 0x80
; GFX1250-NEXT: global_load_b64 v[0:1], v0, s[2:3]
; GFX1250-NEXT: global_load_b64 v[0:1], v0, s[2:3] offset:128 scale_offset
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
%v = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)

View File

@ -0,0 +1,29 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -mtriple=amdgcn-- -mattr=+wavefrontsize32 -passes=instcombine -S < %s | FileCheck --check-prefixes=CHECK,WAVE32 %s
; RUN: opt -mtriple=amdgcn-- -mattr=+wavefrontsize64 -passes=instcombine -S < %s | FileCheck --check-prefixes=CHECK,WAVE64 %s
; As the addition of 32 does not overflow, it can be canonicalized as gep.
define amdgpu_ps <2 x float> @turn_add_into_gep(ptr addrspace(1) inreg %sbase) {
; CHECK-LABEL: define amdgpu_ps <2 x float> @turn_add_into_gep(
; CHECK-SAME: ptr addrspace(1) inreg [[SBASE:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[V:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[V]], 1
; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[MUL]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr addrspace(1) [[SBASE]], i64 [[TMP1]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[TMP2]], i64 128
; CHECK-NEXT: [[LOAD:%.*]] = load <2 x float>, ptr addrspace(1) [[GEP]], align 8
; CHECK-NEXT: ret <2 x float> [[LOAD]]
;
%v = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%mul = shl i32 %v, 1
%add = add i32 %mul, 32
%zext.offset = zext i32 %add to i64
%gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
%load = load <2 x float>, ptr addrspace(1) %gep
ret <2 x float> %load
}
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; WAVE32: {{.*}}
; WAVE64: {{.*}}

View File

@ -50,7 +50,7 @@ define i32 @test_wave_shuffle_dpp_row_share_0(i32 %val) {
; CHECK-NO-WAVE-SIZE-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NO-WAVE-SIZE-NEXT: [[LO:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK-NO-WAVE-SIZE-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
; CHECK-NO-WAVE-SIZE-NEXT: [[MASKED:%.*]] = and i32 [[TID]], 65520
; CHECK-NO-WAVE-SIZE-NEXT: [[MASKED:%.*]] = and i32 [[TID]], 112
; CHECK-NO-WAVE-SIZE-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[MASKED]])
; CHECK-NO-WAVE-SIZE-NEXT: ret i32 [[RES]]
;
@ -77,7 +77,7 @@ define i32 @test_wave_shuffle_dpp_row_share_0_no_or(i32 %val) {
; CHECK-NO-WAVE-SIZE-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NO-WAVE-SIZE-NEXT: [[LO:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK-NO-WAVE-SIZE-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
; CHECK-NO-WAVE-SIZE-NEXT: [[MASKED:%.*]] = and i32 [[TID]], 65520
; CHECK-NO-WAVE-SIZE-NEXT: [[MASKED:%.*]] = and i32 [[TID]], 112
; CHECK-NO-WAVE-SIZE-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[MASKED]])
; CHECK-NO-WAVE-SIZE-NEXT: ret i32 [[RES]]
;
@ -159,7 +159,7 @@ define i32 @test_wave_shuffle_dpp_row_share_7_lo_only(i32 %val) {
; CHECK-W64-LABEL: define i32 @test_wave_shuffle_dpp_row_share_7_lo_only(
; CHECK-W64-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-W64-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK-W64-NEXT: [[MASKED:%.*]] = and i32 [[TID]], 65520
; CHECK-W64-NEXT: [[MASKED:%.*]] = and i32 [[TID]], 48
; CHECK-W64-NEXT: [[SHARE_7:%.*]] = or disjoint i32 [[MASKED]], 7
; CHECK-W64-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[SHARE_7]])
; CHECK-W64-NEXT: ret i32 [[RES]]
@ -167,7 +167,7 @@ define i32 @test_wave_shuffle_dpp_row_share_7_lo_only(i32 %val) {
; CHECK-NO-WAVE-SIZE-LABEL: define i32 @test_wave_shuffle_dpp_row_share_7_lo_only(
; CHECK-NO-WAVE-SIZE-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NO-WAVE-SIZE-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK-NO-WAVE-SIZE-NEXT: [[MASKED:%.*]] = and i32 [[TID]], 65520
; CHECK-NO-WAVE-SIZE-NEXT: [[MASKED:%.*]] = and i32 [[TID]], 48
; CHECK-NO-WAVE-SIZE-NEXT: [[SHARE_7:%.*]] = or disjoint i32 [[MASKED]], 7
; CHECK-NO-WAVE-SIZE-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[SHARE_7]])
; CHECK-NO-WAVE-SIZE-NEXT: ret i32 [[RES]]
@ -217,28 +217,17 @@ define i32 @test_wave_shuffle_dpp_row_share_w32_mask(i32 %val) {
define i32 @test_wave_shuffle_not_quite_row_share(i32 %val) {
; CHECK-W32-LABEL: define i32 @test_wave_shuffle_not_quite_row_share(
; CHECK-W32-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-W32-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK-W32-NEXT: [[MASKED:%.*]] = and i32 [[TID]], 65280
; CHECK-W32-NEXT: [[OR_RES:%.*]] = or disjoint i32 [[MASKED]], 55
; CHECK-W32-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[OR_RES]])
; CHECK-W32-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 55)
; CHECK-W32-NEXT: ret i32 [[RES]]
;
; CHECK-W64-LABEL: define i32 @test_wave_shuffle_not_quite_row_share(
; CHECK-W64-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-W64-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK-W64-NEXT: [[TID1:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[TID]])
; CHECK-W64-NEXT: [[MASKED:%.*]] = and i32 [[TID1]], 65280
; CHECK-W64-NEXT: [[OR_RES:%.*]] = or disjoint i32 [[MASKED]], 55
; CHECK-W64-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[OR_RES]])
; CHECK-W64-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 55)
; CHECK-W64-NEXT: ret i32 [[RES]]
;
; CHECK-NO-WAVE-SIZE-LABEL: define i32 @test_wave_shuffle_not_quite_row_share(
; CHECK-NO-WAVE-SIZE-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NO-WAVE-SIZE-NEXT: [[LO:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK-NO-WAVE-SIZE-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
; CHECK-NO-WAVE-SIZE-NEXT: [[MASKED:%.*]] = and i32 [[TID]], 65280
; CHECK-NO-WAVE-SIZE-NEXT: [[OR_RES:%.*]] = or disjoint i32 [[MASKED]], 55
; CHECK-NO-WAVE-SIZE-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[OR_RES]])
; CHECK-NO-WAVE-SIZE-NEXT: [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 55)
; CHECK-NO-WAVE-SIZE-NEXT: ret i32 [[RES]]
;
%lo = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)

View File

@ -92,5 +92,66 @@ define i32 @ockl_lane_u32() {
ret i32 %hi
}
define i32 @mbcnt_lo_and63() {
; DEFAULT-LABEL: define i32 @mbcnt_lo_and63() {
; DEFAULT-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; DEFAULT-NEXT: ret i32 [[LO]]
;
; WAVE32-LABEL: define i32 @mbcnt_lo_and63
; WAVE32-SAME: () #[[ATTR1]] {
; WAVE32-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; WAVE32-NEXT: ret i32 [[LO]]
;
; WAVE64-LABEL: define i32 @mbcnt_lo_and63
; WAVE64-SAME: () #[[ATTR1]] {
; WAVE64-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; WAVE64-NEXT: ret i32 [[LO]]
;
%lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%and = and i32 %lo, 63
ret i32 %lo
}
define i32 @mbcnt_hi_and31() {
; DEFAULT-LABEL: define i32 @mbcnt_hi_and31() {
; DEFAULT-NEXT: [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 0)
; DEFAULT-NEXT: ret i32 [[HI]]
;
; WAVE32-LABEL: define i32 @mbcnt_hi_and31
; WAVE32-SAME: () #[[ATTR1]] {
; WAVE32-NEXT: ret i32 0
;
; WAVE64-LABEL: define i32 @mbcnt_hi_and31
; WAVE64-SAME: () #[[ATTR1]] {
; WAVE64-NEXT: [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 0)
; WAVE64-NEXT: ret i32 [[HI]]
;
%hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 0)
%and = and i32 %hi, 31
ret i32 %and
}
define i32 @ockl_lane_u32_and127() {
; DEFAULT-LABEL: define i32 @ockl_lane_u32_and127() {
; DEFAULT-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; DEFAULT-NEXT: [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
; DEFAULT-NEXT: ret i32 [[HI]]
;
; WAVE32-LABEL: define i32 @ockl_lane_u32_and127
; WAVE32-SAME: () #[[ATTR1]] {
; WAVE32-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; WAVE32-NEXT: ret i32 [[LO]]
;
; WAVE64-LABEL: define i32 @ockl_lane_u32_and127
; WAVE64-SAME: () #[[ATTR1]] {
; WAVE64-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; WAVE64-NEXT: [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
; WAVE64-NEXT: ret i32 [[HI]]
;
%lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo)
%and = and i32 %hi, 127
ret i32 %and
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}