[DAG] Remove AssertZext if the input is masked (#146052)
Remove AssertZext if the input ensures the assert cannot fail.
This commit is contained in:
parent
d1054e801c
commit
250f2a6367
@ -15263,23 +15263,31 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) {
|
||||
}
|
||||
}
|
||||
|
||||
// If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller
|
||||
// than X, and the And doesn't change the lower iX bits, we can move the
|
||||
// AssertZext in front of the And and drop the AssertSext.
|
||||
if (Opcode == ISD::AssertZext && N0.getOpcode() == ISD::AND &&
|
||||
N0.hasOneUse() && N0.getOperand(0).getOpcode() == ISD::AssertSext &&
|
||||
isa<ConstantSDNode>(N0.getOperand(1))) {
|
||||
SDValue BigA = N0.getOperand(0);
|
||||
EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
|
||||
const APInt &Mask = N0.getConstantOperandAPInt(1);
|
||||
if (AssertVT.bitsLT(BigA_AssertVT) &&
|
||||
Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) {
|
||||
SDLoc DL(N);
|
||||
SDValue NewAssert =
|
||||
DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1);
|
||||
return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert,
|
||||
N0.getOperand(1));
|
||||
|
||||
// If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller
|
||||
// than X, and the And doesn't change the lower iX bits, we can move the
|
||||
// AssertZext in front of the And and drop the AssertSext.
|
||||
if (N0.getOperand(0).getOpcode() == ISD::AssertSext && N0.hasOneUse()) {
|
||||
SDValue BigA = N0.getOperand(0);
|
||||
EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
|
||||
if (AssertVT.bitsLT(BigA_AssertVT) &&
|
||||
Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) {
|
||||
SDLoc DL(N);
|
||||
SDValue NewAssert =
|
||||
DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1);
|
||||
return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert,
|
||||
N0.getOperand(1));
|
||||
}
|
||||
}
|
||||
|
||||
// Remove AssertZext entirely if the mask guarantees the assertion cannot
|
||||
// fail.
|
||||
// TODO: Use KB countMinLeadingZeros to handle non-constant masks?
|
||||
if (Mask.isIntN(AssertVT.getScalarSizeInBits()))
|
||||
return N0;
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
@ -442,9 +442,9 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use() #1 {
|
||||
;
|
||||
; GFX11-LABEL: add_x_shl_neg_to_sub_multi_use:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
|
||||
; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2, v0
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-NEXT: v_and_b32_e32 v0, 0xffc, v0
|
||||
; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0, v0
|
||||
; GFX11-NEXT: ds_store_b32 v0, v1 offset:123
|
||||
; GFX11-NEXT: ds_store_b32 v0, v1 offset:456
|
||||
|
@ -714,10 +714,10 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
|
||||
; GFX11-LABEL: store_load_vindex_kernel:
|
||||
; GFX11: ; %bb.0: ; %bb
|
||||
; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24
|
||||
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-NEXT: v_mov_b32_e32 v2, 15
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-NEXT: v_and_b32_e32 v0, 0xffc, v0
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_lshl_b32 s0, s0, 7
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
|
||||
@ -732,9 +732,9 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
|
||||
; GFX12-LABEL: store_load_vindex_kernel:
|
||||
; GFX12: ; %bb.0: ; %bb
|
||||
; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x24
|
||||
; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
|
||||
; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX12-NEXT: v_and_b32_e32 v0, 0xffc, v0
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: s_lshl_b32 s0, s0, 7
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
|
||||
@ -769,8 +769,8 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
|
||||
; GFX942-LABEL: store_load_vindex_kernel:
|
||||
; GFX942: ; %bb.0: ; %bb
|
||||
; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24
|
||||
; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX942-NEXT: v_and_b32_e32 v0, 0xffc, v0
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: s_lshl_b32 s0, s0, 7
|
||||
@ -809,10 +809,10 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
|
||||
; GFX11-PAL-LABEL: store_load_vindex_kernel:
|
||||
; GFX11-PAL: ; %bb.0: ; %bb
|
||||
; GFX11-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
|
||||
; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-PAL-NEXT: v_mov_b32_e32 v2, 15
|
||||
; GFX11-PAL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0
|
||||
; GFX11-PAL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 7
|
||||
; GFX11-PAL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
|
||||
@ -827,9 +827,9 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
|
||||
; GFX12-PAL-LABEL: store_load_vindex_kernel:
|
||||
; GFX12-PAL: ; %bb.0: ; %bb
|
||||
; GFX12-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
|
||||
; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
|
||||
; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
|
||||
; GFX12-PAL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX12-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0
|
||||
; GFX12-PAL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-PAL-NEXT: s_lshl_b32 s0, s0, 7
|
||||
; GFX12-PAL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
|
||||
@ -1958,10 +1958,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
|
||||
; GFX11-LABEL: store_load_vindex_small_offset_kernel:
|
||||
; GFX11: ; %bb.0: ; %bb
|
||||
; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24
|
||||
; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
|
||||
; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
|
||||
; GFX11-NEXT: scratch_load_b32 v3, off, off glc dlc
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-NEXT: v_and_b32_e32 v0, 0xffc, v0
|
||||
; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:384 dlc
|
||||
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
@ -1976,10 +1976,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
|
||||
; GFX12-LABEL: store_load_vindex_small_offset_kernel:
|
||||
; GFX12: ; %bb.0: ; %bb
|
||||
; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x24
|
||||
; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
|
||||
; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
|
||||
; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX12-NEXT: v_and_b32_e32 v0, 0xffc, v0
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:384 scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_wait_storecnt 0x0
|
||||
@ -2021,8 +2021,8 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
|
||||
; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24
|
||||
; GFX942-NEXT: scratch_load_dword v1, off, off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX942-NEXT: v_and_b32_e32 v0, 0xffc, v0
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: s_lshl_b32 s0, s0, 7
|
||||
@ -2092,10 +2092,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
|
||||
; GFX11-PAL-LABEL: store_load_vindex_small_offset_kernel:
|
||||
; GFX11-PAL: ; %bb.0: ; %bb
|
||||
; GFX11-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
|
||||
; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
|
||||
; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
|
||||
; GFX11-PAL-NEXT: scratch_load_b32 v3, off, off glc dlc
|
||||
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0
|
||||
; GFX11-PAL-NEXT: scratch_store_b32 v0, v1, off offset:384 dlc
|
||||
; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-PAL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
@ -2110,10 +2110,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
|
||||
; GFX12-PAL-LABEL: store_load_vindex_small_offset_kernel:
|
||||
; GFX12-PAL: ; %bb.0: ; %bb
|
||||
; GFX12-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
|
||||
; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
|
||||
; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
|
||||
; GFX12-PAL-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
|
||||
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX12-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0
|
||||
; GFX12-PAL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, off offset:384 scope:SCOPE_SYS
|
||||
; GFX12-PAL-NEXT: s_wait_storecnt 0x0
|
||||
@ -3254,10 +3254,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
|
||||
; GFX11-LABEL: store_load_vindex_large_offset_kernel:
|
||||
; GFX11: ; %bb.0: ; %bb
|
||||
; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24
|
||||
; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
|
||||
; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
|
||||
; GFX11-NEXT: scratch_load_b32 v3, off, off offset:4 glc dlc
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-NEXT: v_and_b32_e32 v0, 0xffc, v0
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_lshl_b32 s0, s0, 7
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
@ -3274,10 +3274,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
|
||||
; GFX12-LABEL: store_load_vindex_large_offset_kernel:
|
||||
; GFX12: ; %bb.0: ; %bb
|
||||
; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x24
|
||||
; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
|
||||
; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
|
||||
; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX12-NEXT: v_and_b32_e32 v0, 0xffc, v0
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:16512 scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_wait_storecnt 0x0
|
||||
@ -3319,8 +3319,8 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
|
||||
; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24
|
||||
; GFX942-NEXT: scratch_load_dword v1, off, off offset:4 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX942-NEXT: v_and_b32_e32 v0, 0xffc, v0
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: s_lshl_b32 s0, s0, 7
|
||||
@ -3391,10 +3391,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
|
||||
; GFX11-PAL-LABEL: store_load_vindex_large_offset_kernel:
|
||||
; GFX11-PAL: ; %bb.0: ; %bb
|
||||
; GFX11-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
|
||||
; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
|
||||
; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
|
||||
; GFX11-PAL-NEXT: scratch_load_b32 v3, off, off offset:4 glc dlc
|
||||
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0
|
||||
; GFX11-PAL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 7
|
||||
; GFX11-PAL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
@ -3411,10 +3411,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
|
||||
; GFX12-PAL-LABEL: store_load_vindex_large_offset_kernel:
|
||||
; GFX12-PAL: ; %bb.0: ; %bb
|
||||
; GFX12-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
|
||||
; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
|
||||
; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
|
||||
; GFX12-PAL-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
|
||||
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX12-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0
|
||||
; GFX12-PAL-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, off offset:16512 scope:SCOPE_SYS
|
||||
; GFX12-PAL-NEXT: s_wait_storecnt 0x0
|
||||
|
@ -15,8 +15,8 @@ define amdgpu_kernel void @test_iglp_opt_mfma_gemm(ptr addrspace(3) noalias %in,
|
||||
; GCN-LABEL: test_iglp_opt_mfma_gemm:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 0x1ff80, v0
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, 2.0
|
||||
; GCN-NEXT: ; iglp_opt mask(0x00000000)
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
@ -153,8 +153,8 @@ define amdgpu_kernel void @test_iglp_opt_rev_mfma_gemm(ptr addrspace(3) noalias
|
||||
; GCN-LABEL: test_iglp_opt_rev_mfma_gemm:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 0x1ff80, v0
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, 1.0
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, 2.0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
@ -289,8 +289,8 @@ define amdgpu_kernel void @test_iglp_opt_asm_sideeffect(ptr addrspace(3) noalias
|
||||
; GCN-LABEL: test_iglp_opt_asm_sideeffect:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 0xffc, v0
|
||||
; GCN-NEXT: ; iglp_opt mask(0x00000000)
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_add_u32_e32 v1, s0, v0
|
||||
|
@ -6,9 +6,9 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_WMMA_cluster(ptr ad
|
||||
; GCN-LABEL: test_sched_group_barrier_pipeline_WMMA_cluster:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v0, 5, v0
|
||||
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v40, 5, v0
|
||||
; GCN-NEXT: v_and_b32_e32 v40, 0x7fe0, v0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_add_nc_u32_e32 v32, s0, v40
|
||||
; GCN-NEXT: v_dual_mov_b32 v81, s1 :: v_dual_add_nc_u32 v80, s1, v40
|
||||
@ -74,9 +74,9 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_WMMA_cluster(ptr ad
|
||||
; EXACTCUTOFF-LABEL: test_sched_group_barrier_pipeline_WMMA_cluster:
|
||||
; EXACTCUTOFF: ; %bb.0: ; %entry
|
||||
; EXACTCUTOFF-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; EXACTCUTOFF-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v0, 5, v0
|
||||
; EXACTCUTOFF-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
||||
; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v40, 5, v0
|
||||
; EXACTCUTOFF-NEXT: v_and_b32_e32 v40, 0x7fe0, v0
|
||||
; EXACTCUTOFF-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; EXACTCUTOFF-NEXT: v_add_nc_u32_e32 v32, s0, v40
|
||||
; EXACTCUTOFF-NEXT: v_dual_mov_b32 v81, s1 :: v_dual_add_nc_u32 v80, s1, v40
|
||||
@ -178,9 +178,9 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_WMMA_interleave(ptr
|
||||
; GCN-LABEL: test_sched_group_barrier_pipeline_WMMA_interleave:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v0, 5, v0
|
||||
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v16, 5, v0
|
||||
; GCN-NEXT: v_and_b32_e32 v16, 0x7fe0, v0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_add_nc_u32_e32 v17, s0, v16
|
||||
; GCN-NEXT: v_add_nc_u32_e32 v16, s1, v16
|
||||
@ -260,9 +260,9 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_WMMA_interleave(ptr
|
||||
; EXACTCUTOFF-LABEL: test_sched_group_barrier_pipeline_WMMA_interleave:
|
||||
; EXACTCUTOFF: ; %bb.0: ; %entry
|
||||
; EXACTCUTOFF-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; EXACTCUTOFF-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v0, 5, v0
|
||||
; EXACTCUTOFF-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
||||
; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v16, 5, v0
|
||||
; EXACTCUTOFF-NEXT: v_and_b32_e32 v16, 0x7fe0, v0
|
||||
; EXACTCUTOFF-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; EXACTCUTOFF-NEXT: v_add_nc_u32_e32 v17, s0, v16
|
||||
; EXACTCUTOFF-NEXT: v_add_nc_u32_e32 v16, s1, v16
|
||||
|
@ -8,10 +8,10 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_SWMMAC_cluster(ptr
|
||||
; GCN-LABEL: test_sched_group_barrier_pipeline_SWMMAC_cluster:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v0, 4, v0
|
||||
; GCN-NEXT: v_mov_b32_e32 v48, 0
|
||||
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v28, 4, v0
|
||||
; GCN-NEXT: v_and_b32_e32 v28, 0x3ff0, v0
|
||||
; GCN-NEXT: s_wait_kmcnt 0x0
|
||||
; GCN-NEXT: v_add_nc_u32_e32 v0, s0, v28
|
||||
; GCN-NEXT: v_dual_mov_b32 v50, s1 :: v_dual_add_nc_u32 v49, s1, v28
|
||||
@ -60,10 +60,10 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_SWMMAC_cluster(ptr
|
||||
; EXACTCUTOFF-LABEL: test_sched_group_barrier_pipeline_SWMMAC_cluster:
|
||||
; EXACTCUTOFF: ; %bb.0: ; %entry
|
||||
; EXACTCUTOFF-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; EXACTCUTOFF-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v0, 4, v0
|
||||
; EXACTCUTOFF-NEXT: v_mov_b32_e32 v48, 0
|
||||
; EXACTCUTOFF-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
||||
; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v28, 4, v0
|
||||
; EXACTCUTOFF-NEXT: v_and_b32_e32 v28, 0x3ff0, v0
|
||||
; EXACTCUTOFF-NEXT: s_wait_kmcnt 0x0
|
||||
; EXACTCUTOFF-NEXT: v_add_nc_u32_e32 v0, s0, v28
|
||||
; EXACTCUTOFF-NEXT: v_dual_mov_b32 v50, s1 :: v_dual_add_nc_u32 v49, s1, v28
|
||||
|
@ -7,8 +7,8 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave(ptr
|
||||
; GCN-MINREG-LABEL: test_sched_group_barrier_pipeline_MFMA_interleave:
|
||||
; GCN-MINREG: ; %bb.0: ; %entry
|
||||
; GCN-MINREG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
||||
; GCN-MINREG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GCN-MINREG-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
; GCN-MINREG-NEXT: v_and_b32_e32 v0, 0x1ff80, v0
|
||||
; GCN-MINREG-NEXT: v_mov_b32_e32 v2, 1.0
|
||||
; GCN-MINREG-NEXT: v_mov_b32_e32 v1, 2.0
|
||||
; GCN-MINREG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
@ -140,8 +140,8 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave(ptr
|
||||
; GCN-MAXOCC-LABEL: test_sched_group_barrier_pipeline_MFMA_interleave:
|
||||
; GCN-MAXOCC: ; %bb.0: ; %entry
|
||||
; GCN-MAXOCC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
||||
; GCN-MAXOCC-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GCN-MAXOCC-NEXT: v_lshlrev_b32_e32 v1, 7, v0
|
||||
; GCN-MAXOCC-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
; GCN-MAXOCC-NEXT: v_and_b32_e32 v1, 0x1ff80, v0
|
||||
; GCN-MAXOCC-NEXT: v_mov_b32_e32 v2, 1.0
|
||||
; GCN-MAXOCC-NEXT: v_mov_b32_e32 v3, 2.0
|
||||
; GCN-MAXOCC-NEXT: s_waitcnt lgkmcnt(0)
|
||||
@ -274,8 +274,8 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave(ptr
|
||||
; GCN-ILP-LABEL: test_sched_group_barrier_pipeline_MFMA_interleave:
|
||||
; GCN-ILP: ; %bb.0: ; %entry
|
||||
; GCN-ILP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
||||
; GCN-ILP-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GCN-ILP-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
; GCN-ILP-NEXT: v_and_b32_e32 v0, 0x1ff80, v0
|
||||
; GCN-ILP-NEXT: v_mov_b32_e32 v1, 1.0
|
||||
; GCN-ILP-NEXT: v_mov_b32_e32 v2, 2.0
|
||||
; GCN-ILP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
@ -469,8 +469,8 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave_spl
|
||||
; GCN-MINREG-LABEL: test_sched_group_barrier_pipeline_MFMA_interleave_split_region:
|
||||
; GCN-MINREG: ; %bb.0: ; %entry
|
||||
; GCN-MINREG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
||||
; GCN-MINREG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GCN-MINREG-NEXT: v_lshlrev_b32_e32 v2, 7, v0
|
||||
; GCN-MINREG-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
; GCN-MINREG-NEXT: v_and_b32_e32 v2, 0x1ff80, v0
|
||||
; GCN-MINREG-NEXT: v_mov_b32_e32 v1, 1.0
|
||||
; GCN-MINREG-NEXT: v_mov_b32_e32 v0, 2.0
|
||||
; GCN-MINREG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
@ -604,8 +604,8 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave_spl
|
||||
; GCN-MAXOCC-LABEL: test_sched_group_barrier_pipeline_MFMA_interleave_split_region:
|
||||
; GCN-MAXOCC: ; %bb.0: ; %entry
|
||||
; GCN-MAXOCC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
||||
; GCN-MAXOCC-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GCN-MAXOCC-NEXT: v_lshlrev_b32_e32 v3, 7, v0
|
||||
; GCN-MAXOCC-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
; GCN-MAXOCC-NEXT: v_and_b32_e32 v3, 0x1ff80, v0
|
||||
; GCN-MAXOCC-NEXT: v_mov_b32_e32 v1, 1.0
|
||||
; GCN-MAXOCC-NEXT: v_mov_b32_e32 v2, 2.0
|
||||
; GCN-MAXOCC-NEXT: s_waitcnt lgkmcnt(0)
|
||||
@ -739,8 +739,8 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave_spl
|
||||
; GCN-ILP-LABEL: test_sched_group_barrier_pipeline_MFMA_interleave_split_region:
|
||||
; GCN-ILP: ; %bb.0: ; %entry
|
||||
; GCN-ILP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
||||
; GCN-ILP-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GCN-ILP-NEXT: v_lshlrev_b32_e32 v2, 7, v0
|
||||
; GCN-ILP-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
; GCN-ILP-NEXT: v_and_b32_e32 v2, 0x1ff80, v0
|
||||
; GCN-ILP-NEXT: v_mov_b32_e32 v0, 1.0
|
||||
; GCN-ILP-NEXT: v_mov_b32_e32 v1, 2.0
|
||||
; GCN-ILP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
@ -621,8 +621,8 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_cluster(ptr ad
|
||||
; GCN-LABEL: test_sched_group_barrier_pipeline_MFMA_cluster:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 0x1ff80, v0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_add_u32_e32 v1, s0, v0
|
||||
; GCN-NEXT: ds_read_b128 a[156:159], v1 offset:112
|
||||
@ -728,8 +728,8 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_cluster(ptr ad
|
||||
; EXACTCUTOFF-LABEL: test_sched_group_barrier_pipeline_MFMA_cluster:
|
||||
; EXACTCUTOFF: ; %bb.0: ; %entry
|
||||
; EXACTCUTOFF-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
||||
; EXACTCUTOFF-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
; EXACTCUTOFF-NEXT: v_and_b32_e32 v0, 0x1ff80, v0
|
||||
; EXACTCUTOFF-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; EXACTCUTOFF-NEXT: v_add_u32_e32 v1, s0, v0
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[156:159], v1 offset:112
|
||||
@ -871,8 +871,8 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave(ptr
|
||||
; GCN-LABEL: test_sched_group_barrier_pipeline_MFMA_interleave:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 0x1ff80, v0
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, 1.0
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, 2.0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
@ -1005,8 +1005,8 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave(ptr
|
||||
; EXACTCUTOFF-LABEL: test_sched_group_barrier_pipeline_MFMA_interleave:
|
||||
; EXACTCUTOFF: ; %bb.0: ; %entry
|
||||
; EXACTCUTOFF-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
||||
; EXACTCUTOFF-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
; EXACTCUTOFF-NEXT: v_and_b32_e32 v0, 0x1ff80, v0
|
||||
; EXACTCUTOFF-NEXT: v_mov_b32_e32 v2, 1.0
|
||||
; EXACTCUTOFF-NEXT: v_mov_b32_e32 v3, 2.0
|
||||
; EXACTCUTOFF-NEXT: s_waitcnt lgkmcnt(0)
|
||||
@ -1202,7 +1202,7 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b
|
||||
; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
|
||||
; GCN-NEXT: v_mov_b32_e32 v7, 0x32a5705f
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_f32_e32 v4, s0, v3
|
||||
; GCN-NEXT: v_rndne_f32_e32 v5, v4
|
||||
@ -1212,7 +1212,7 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
|
||||
; GCN-NEXT: v_add_f32_e32 v4, v6, v4
|
||||
; GCN-NEXT: v_exp_f32_e32 v4, v4
|
||||
; GCN-NEXT: v_cvt_i32_f32_e32 v5, v5
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 0x1ff80, v0
|
||||
; GCN-NEXT: v_add_u32_e32 v1, s6, v0
|
||||
; GCN-NEXT: ds_read_b128 a[124:127], v1 offset:112
|
||||
; GCN-NEXT: ds_read_b128 a[120:123], v1 offset:96
|
||||
@ -1387,7 +1387,7 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
|
||||
; EXACTCUTOFF-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b
|
||||
; EXACTCUTOFF-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
|
||||
; EXACTCUTOFF-NEXT: v_mov_b32_e32 v7, 0x32a5705f
|
||||
; EXACTCUTOFF-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
; EXACTCUTOFF-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; EXACTCUTOFF-NEXT: v_mul_f32_e32 v4, s0, v3
|
||||
; EXACTCUTOFF-NEXT: v_rndne_f32_e32 v5, v4
|
||||
@ -1397,7 +1397,7 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
|
||||
; EXACTCUTOFF-NEXT: v_add_f32_e32 v4, v6, v4
|
||||
; EXACTCUTOFF-NEXT: v_exp_f32_e32 v4, v4
|
||||
; EXACTCUTOFF-NEXT: v_cvt_i32_f32_e32 v5, v5
|
||||
; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
; EXACTCUTOFF-NEXT: v_and_b32_e32 v0, 0x1ff80, v0
|
||||
; EXACTCUTOFF-NEXT: v_add_u32_e32 v1, s6, v0
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[124:127], v1 offset:112
|
||||
; EXACTCUTOFF-NEXT: ds_read_b128 a[120:123], v1 offset:96
|
||||
|
@ -208,11 +208,11 @@ define weak_odr amdgpu_kernel void @dpp_test1(ptr %arg) local_unnamed_addr {
|
||||
;
|
||||
; GFX11-LABEL: dpp_test1:
|
||||
; GFX11: ; %bb.0: ; %bb
|
||||
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
|
||||
; GFX11-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-NEXT: v_and_b32_e32 v0, 0xffc, v0
|
||||
; GFX11-NEXT: ds_load_b32 v1, v0
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_barrier
|
||||
|
@ -146,8 +146,8 @@ define void @mubuf_clause(ptr addrspace(5) noalias nocapture readonly %arg, ptr
|
||||
; GCN-LABEL: mubuf_clause:
|
||||
; GCN: ; %bb.0: ; %bb
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_and_b32_e32 v2, 0x3ff, v31
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v2, 4, v2
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v2, 4, v31
|
||||
; GCN-NEXT: v_and_b32_e32 v2, 0x3ff0, v2
|
||||
; GCN-NEXT: v_add_u32_e32 v0, v0, v2
|
||||
; GCN-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:12
|
||||
; GCN-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8
|
||||
@ -205,8 +205,8 @@ define void @mubuf_clause(ptr addrspace(5) noalias nocapture readonly %arg, ptr
|
||||
; GCN-SCRATCH-LABEL: mubuf_clause:
|
||||
; GCN-SCRATCH: ; %bb.0: ; %bb
|
||||
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-SCRATCH-NEXT: v_and_b32_e32 v2, 0x3ff, v31
|
||||
; GCN-SCRATCH-NEXT: v_lshlrev_b32_e32 v18, 4, v2
|
||||
; GCN-SCRATCH-NEXT: v_lshlrev_b32_e32 v2, 4, v31
|
||||
; GCN-SCRATCH-NEXT: v_and_b32_e32 v18, 0x3ff0, v2
|
||||
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v0, v0, v18
|
||||
; GCN-SCRATCH-NEXT: s_clause 0x3
|
||||
; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[2:5], v0, off
|
||||
|
Loading…
x
Reference in New Issue
Block a user