[AMDGPU][SIInsertWaitCnt] Optimize loadcnt insertion at function boundaries (#169647)

On GFX12+, GLOBAL_INV increments the loadcnt counter but does not write
results to any VGPRs. Previously, we unconditionally inserted
s_wait_loadcnt 0 at function returns even when the only pending loadcnt
was from GLOBAL_INV instructions.

This patch optimizes waitcnt insertion by skipping the loadcnt wait at
function boundaries when no VGPRs have pending loads. This is determined
by checking if any VGPR has a score greater than the lower bound for
LOAD_CNT - if not, the pending loadcnt must be from non-VGPR-writing
instructions like GLOBAL_INV.

The optimization is limited to GFX12+ targets where GLOBAL_INV exists
and uses the extended wait count instructions.

This is a follow-up optimization to PR #135340 which added tracking for
GLOBAL_INV in the waitcnt pass.
This commit is contained in:
Pankaj Dwivedi 2025-12-17 17:53:00 +05:30 committed by GitHub
parent 06e4728f83
commit 28d4e33b65
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
36 changed files with 157 additions and 1405 deletions

View File

@ -147,6 +147,7 @@ struct HardwareLimits {
DECL(VMEM_ACCESS) /* vmem read & write (pre-gfx10), vmem read (gfx10+) */ \
DECL(VMEM_SAMPLER_READ_ACCESS) /* vmem SAMPLER read (gfx12+ only) */ \
DECL(VMEM_BVH_READ_ACCESS) /* vmem BVH read (gfx12+ only) */ \
DECL(GLOBAL_INV_ACCESS) /* GLOBAL_INV (gfx12+ only) */ \
DECL(VMEM_WRITE_ACCESS) /* vmem write that is not scratch */ \
DECL(SCRATCH_WRITE_ACCESS) /* vmem write that may be scratch */ \
DECL(VMEM_GROUP) /* vmem group */ \
@ -402,7 +403,7 @@ public:
assert(ST);
static const unsigned WaitEventMaskForInstGFX12Plus[NUM_INST_CNTS] = {
eventMask({VMEM_ACCESS}),
eventMask({VMEM_ACCESS, GLOBAL_INV_ACCESS}),
eventMask({LDS_ACCESS, GDS_ACCESS}),
eventMask({EXP_GPR_LOCK, GDS_GPR_LOCK, VMW_GPR_LOCK, EXP_PARAM_ACCESS,
EXP_POS_ACCESS, EXP_LDS_ACCESS}),
@ -536,7 +537,8 @@ public:
switch (Inst.getOpcode()) {
// FIXME: GLOBAL_INV needs to be tracked with xcnt too.
case AMDGPU::GLOBAL_INV:
return VMEM_ACCESS; // tracked using loadcnt
return GLOBAL_INV_ACCESS; // tracked using loadcnt, but doesn't write
// VGPRs
case AMDGPU::GLOBAL_WB:
case AMDGPU::GLOBAL_WBINV:
return VMEM_WRITE_ACCESS; // tracked using storecnt
@ -1377,6 +1379,20 @@ bool WaitcntBrackets::counterOutOfOrder(InstCounterType T) const {
if ((T == Context->SmemAccessCounter && hasPendingEvent(SMEM_ACCESS)) ||
(T == X_CNT && hasPendingEvent(SMEM_GROUP)))
return true;
// GLOBAL_INV completes in-order with other LOAD_CNT events (VMEM_ACCESS),
// so having GLOBAL_INV_ACCESS mixed with other LOAD_CNT events doesn't cause
// out-of-order completion.
if (T == LOAD_CNT) {
unsigned Events = hasPendingEvent(T);
// Remove GLOBAL_INV_ACCESS from the event mask before checking for mixed
// events
Events &= ~(1 << GLOBAL_INV_ACCESS);
// Return true only if there are still multiple event types after removing
// GLOBAL_INV
return Events & (Events - 1);
}
return hasMixedPendingEvents(T);
}
@ -1946,7 +1962,16 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
Opc == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
Opc == AMDGPU::S_SETPC_B64_return ||
(MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) {
Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false));
AMDGPU::Waitcnt AllZeroWait =
WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false);
// On GFX12+, if LOAD_CNT is pending but no VGPRs are waiting for loads
// (e.g., only GLOBAL_INV is pending), we can skip waiting on loadcnt.
// GLOBAL_INV increments loadcnt but doesn't write to VGPRs, so there's
// no need to wait for it at function boundaries.
if (ST->hasExtendedWaitCounts() &&
!ScoreBrackets.hasPendingEvent(VMEM_ACCESS))
AllZeroWait.LoadCnt = ~0u;
Wait = Wait.combined(AllZeroWait);
}
// In dynamic VGPR mode, we want to release the VGPRs before the wave exits.
// Technically the hardware will do this on its own if we don't, but that

View File

@ -22,7 +22,6 @@ define float @local_atomic_fmax_ret_f32(ptr addrspace(3) %ptr, float %val) {
; GFX12-NEXT: ds_max_num_rtn_f32 v0, v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_ret_f32:
@ -95,7 +94,6 @@ define void @local_atomic_fmax_noret_f32(ptr addrspace(3) %ptr, float %val) {
; GFX12-NEXT: ds_max_num_f32 v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_noret_f32:
@ -168,7 +166,6 @@ define double @local_atomic_fmax_ret_f64(ptr addrspace(3) %ptr, double %val) {
; GFX12-NEXT: ds_max_num_rtn_f64 v[0:1], v0, v[1:2]
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_ret_f64:
@ -245,7 +242,6 @@ define void @local_atomic_fmax_noret_f64(ptr addrspace(3) %ptr, double %val) {
; GFX12-NEXT: ds_max_num_f64 v0, v[1:2]
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_noret_f64:
@ -322,7 +318,6 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(pt
; GFX12-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
@ -469,7 +464,6 @@ define void @global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(p
; GFX12-NEXT: global_atomic_max_num_f32 v[0:1], v2, off scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
@ -630,7 +624,6 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(p
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
@ -786,7 +779,6 @@ define void @global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(p
; GFX12-NEXT: s_cbranch_execnz .LBB7_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
@ -917,7 +909,6 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: flat_atomic_max_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
@ -1060,7 +1051,6 @@ define void @flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: flat_atomic_max_num_f32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
@ -1220,7 +1210,6 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
@ -1374,7 +1363,6 @@ define void @flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: s_cbranch_execnz .LBB11_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
@ -1507,7 +1495,6 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
@ -1664,7 +1651,6 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
@ -1838,7 +1824,6 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
; GFX12-NEXT: s_cbranch_execnz .LBB14_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
@ -2005,7 +1990,6 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
; GFX12-NEXT: s_cbranch_execnz .LBB15_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:

View File

@ -22,7 +22,6 @@ define float @local_atomic_fmin_ret_f32(ptr addrspace(3) %ptr, float %val) {
; GFX12-NEXT: ds_min_num_rtn_f32 v0, v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_ret_f32:
@ -95,7 +94,6 @@ define void @local_atomic_fmin_noret_f32(ptr addrspace(3) %ptr, float %val) {
; GFX12-NEXT: ds_min_num_f32 v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_noret_f32:
@ -168,7 +166,6 @@ define double @local_atomic_fmin_ret_f64(ptr addrspace(3) %ptr, double %val) {
; GFX12-NEXT: ds_min_num_rtn_f64 v[0:1], v0, v[1:2]
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_ret_f64:
@ -245,7 +242,6 @@ define void @local_atomic_fmin_noret_f64(ptr addrspace(3) %ptr, double %val) {
; GFX12-NEXT: ds_min_num_f64 v0, v[1:2]
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_noret_f64:
@ -322,7 +318,6 @@ define float @global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(pt
; GFX12-NEXT: global_atomic_min_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
@ -469,7 +464,6 @@ define void @global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(p
; GFX12-NEXT: global_atomic_min_num_f32 v[0:1], v2, off scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
@ -630,7 +624,6 @@ define double @global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(p
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
@ -786,7 +779,6 @@ define void @global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(p
; GFX12-NEXT: s_cbranch_execnz .LBB7_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
@ -917,7 +909,6 @@ define float @flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: flat_atomic_min_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
@ -1060,7 +1051,6 @@ define void @flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: flat_atomic_min_num_f32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
@ -1220,7 +1210,6 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
@ -1374,7 +1363,6 @@ define void @flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: s_cbranch_execnz .LBB11_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
@ -1507,7 +1495,6 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_m
; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
@ -1664,7 +1651,6 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_
; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
@ -1838,7 +1824,6 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_
; GFX12-NEXT: s_cbranch_execnz .LBB14_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
@ -2005,7 +1990,6 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_
; GFX12-NEXT: s_cbranch_execnz .LBB15_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:

View File

@ -1803,7 +1803,6 @@ define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %dat
; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
@ -1841,7 +1840,6 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl
; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
@ -1881,7 +1879,6 @@ define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, doub
; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0
@ -2127,7 +2124,6 @@ define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 {
; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
@ -2165,7 +2161,6 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
@ -2207,7 +2202,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 {
; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0

View File

@ -1232,7 +1232,6 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(ptr addrspace(1) inr
; GFX12-NEXT: global_atomic_add_u32 v0, v1, v0, s[2:3] offset:16380 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
%result = atomicrmw add ptr addrspace(1) %gep, i32 2 syncscope("agent") seq_cst
@ -1280,7 +1279,6 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(ptr addrspace(
; GFX12-NEXT: global_atomic_add_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
%result = atomicrmw add ptr addrspace(1) %gep, i32 2 syncscope("agent") seq_cst
@ -1322,7 +1320,6 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4095(ptr addrspace(1) %pt
; GFX12-NEXT: global_atomic_add_u32 v0, v[0:1], v2, off offset:16380 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
%result = atomicrmw add ptr addrspace(1) %gep, i32 2 syncscope("agent") seq_cst
@ -1367,7 +1364,6 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(ptr addrspace(
; GFX12-NEXT: global_atomic_add_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
%result = atomicrmw add ptr addrspace(1) %gep, i32 2 syncscope("agent") seq_cst
@ -1418,7 +1414,6 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_vgpr_offset(ptr addrspace(1) in
; GFX12-NEXT: global_atomic_add_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %voffset
%result = atomicrmw add ptr addrspace(1) %gep, i32 2 syncscope("agent") seq_cst
@ -1463,7 +1458,6 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(ptr addrspace(1) inreg
; GFX12-NEXT: global_atomic_cmpswap_b32 v0, v0, v[1:2], s[2:3] offset:16380 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
%result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
@ -1513,7 +1507,6 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(ptr addrspace(1)
; GFX12-NEXT: global_atomic_cmpswap_b32 v0, v[3:4], v[1:2], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
%result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
@ -1556,7 +1549,6 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4095(ptr addrspace(1) %ptr,
; GFX12-NEXT: global_atomic_cmpswap_b32 v0, v[0:1], v[3:4], off offset:16380 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
%result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
@ -1601,7 +1593,6 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(ptr addrspace(1)
; GFX12-NEXT: global_atomic_cmpswap_b32 v0, v[0:1], v[3:4], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
%result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
@ -1655,7 +1646,6 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_vgpr_offset(ptr addrspace(1) inre
; GFX12-NEXT: global_atomic_cmpswap_b32 v0, v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %voffset
%result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst

View File

@ -111,7 +111,6 @@ define float @syncscope_system(ptr %addr, float %val) #0 {
; GFX1200-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1200-NEXT: global_inv scope:SCOPE_SYS
; GFX1200-NEXT: s_wait_loadcnt 0x0
; GFX1200-NEXT: s_setpc_b64 s[30:31]
%res = atomicrmw fadd ptr %addr, float %val seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
ret float %res
@ -215,7 +214,6 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) #0 {
; GFX1200-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SE
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1200-NEXT: global_inv scope:SCOPE_SE
; GFX1200-NEXT: s_wait_loadcnt 0x0
; GFX1200-NEXT: s_setpc_b64 s[30:31]
%res = atomicrmw fadd ptr %addr, float %val syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
ret float %res
@ -350,7 +348,6 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 {
; GFX1200-NEXT: flat_atomic_add_f32 v[0:1], v2 scope:SCOPE_SE
; GFX1200-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1200-NEXT: global_inv scope:SCOPE_SE
; GFX1200-NEXT: s_wait_loadcnt 0x0
; GFX1200-NEXT: s_setpc_b64 s[30:31]
%res = atomicrmw fadd ptr %addr, float %val syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
ret void
@ -442,7 +439,6 @@ define float @no_unsafe(ptr %addr, float %val) {
; GFX1200-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SE
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1200-NEXT: global_inv scope:SCOPE_SE
; GFX1200-NEXT: s_wait_loadcnt 0x0
; GFX1200-NEXT: s_setpc_b64 s[30:31]
%res = atomicrmw fadd ptr %addr, float %val syncscope("workgroup") seq_cst
ret float %res

View File

@ -600,7 +600,6 @@ define i32 @global_atomic_usub_cond(ptr addrspace(1) %ptr, i32 %data) {
; GFX12-SDAG-NEXT: global_atomic_cond_sub_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: global_inv scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-GISEL-LABEL: global_atomic_usub_cond:
@ -638,7 +637,6 @@ define i32 @global_atomic_usub_cond(ptr addrspace(1) %ptr, i32 %data) {
; GFX12-GISEL-NEXT: global_atomic_cond_sub_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: global_inv scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%ret = atomicrmw usub_cond ptr addrspace(1) %ptr, i32 %data syncscope("agent") seq_cst, align 4
ret i32 %ret
@ -684,7 +682,6 @@ define i32 @global_atomic_usub_cond_offset(ptr addrspace(1) %ptr, i32 %data) {
; GFX12-SDAG-NEXT: global_atomic_cond_sub_u32 v0, v[0:1], v2, off offset:4096 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: global_inv scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-GISEL-LABEL: global_atomic_usub_cond_offset:
@ -723,7 +720,6 @@ define i32 @global_atomic_usub_cond_offset(ptr addrspace(1) %ptr, i32 %data) {
; GFX12-GISEL-NEXT: global_atomic_cond_sub_u32 v0, v[0:1], v2, off offset:4096 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: global_inv scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 1024
%ret = atomicrmw usub_cond ptr addrspace(1) %gep, i32 %data syncscope("agent") seq_cst, align 4
@ -765,7 +761,6 @@ define void @global_atomic_usub_cond_nortn(ptr addrspace(1) %ptr, i32 %data) {
; GFX12-SDAG-NEXT: global_atomic_cond_sub_u32 v[0:1], v2, off scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: global_inv scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-GISEL-LABEL: global_atomic_usub_cond_nortn:
@ -802,7 +797,6 @@ define void @global_atomic_usub_cond_nortn(ptr addrspace(1) %ptr, i32 %data) {
; GFX12-GISEL-NEXT: global_atomic_cond_sub_u32 v[0:1], v2, off scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
; GFX12-GISEL-NEXT: global_inv scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%ret = atomicrmw usub_cond ptr addrspace(1) %ptr, i32 %data syncscope("agent") seq_cst, align 4
ret void
@ -848,7 +842,6 @@ define void @global_atomic_usub_cond_offset_nortn(ptr addrspace(1) %ptr, i32 %da
; GFX12-SDAG-NEXT: global_atomic_cond_sub_u32 v[0:1], v2, off offset:4096 scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: global_inv scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-GISEL-LABEL: global_atomic_usub_cond_offset_nortn:
@ -887,7 +880,6 @@ define void @global_atomic_usub_cond_offset_nortn(ptr addrspace(1) %ptr, i32 %da
; GFX12-GISEL-NEXT: global_atomic_cond_sub_u32 v[0:1], v2, off offset:4096 scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
; GFX12-GISEL-NEXT: global_inv scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 1024
%ret = atomicrmw usub_cond ptr addrspace(1) %gep, i32 %data syncscope("agent") seq_cst, align 4
@ -1105,7 +1097,6 @@ define i32 @global_atomic_usub_cond__amdgpu_no_remote_memory(ptr addrspace(1) %p
; GFX12-SDAG-NEXT: global_atomic_cond_sub_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: global_inv scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-GISEL-LABEL: global_atomic_usub_cond__amdgpu_no_remote_memory:
@ -1143,7 +1134,6 @@ define i32 @global_atomic_usub_cond__amdgpu_no_remote_memory(ptr addrspace(1) %p
; GFX12-GISEL-NEXT: global_atomic_cond_sub_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: global_inv scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%ret = atomicrmw usub_cond ptr addrspace(1) %ptr, i32 %data syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
ret i32 %ret
@ -1185,7 +1175,6 @@ define i32 @global_atomic_usub_cond__amdgpu_no_fine_grained_memory(ptr addrspace
; GFX12-SDAG-NEXT: global_atomic_cond_sub_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: global_inv scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-GISEL-LABEL: global_atomic_usub_cond__amdgpu_no_fine_grained_memory:
@ -1223,7 +1212,6 @@ define i32 @global_atomic_usub_cond__amdgpu_no_fine_grained_memory(ptr addrspace
; GFX12-GISEL-NEXT: global_atomic_cond_sub_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: global_inv scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%ret = atomicrmw usub_cond ptr addrspace(1) %ptr, i32 %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
ret i32 %ret
@ -1265,7 +1253,6 @@ define i32 @global_atomic_usub_cond__amdgpu_no_fine_grained_memory__amdgpu_no_re
; GFX12-SDAG-NEXT: global_atomic_cond_sub_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: global_inv scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-GISEL-LABEL: global_atomic_usub_cond__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -1303,7 +1290,6 @@ define i32 @global_atomic_usub_cond__amdgpu_no_fine_grained_memory__amdgpu_no_re
; GFX12-GISEL-NEXT: global_atomic_cond_sub_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: global_inv scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
%ret = atomicrmw usub_cond ptr addrspace(1) %ptr, i32 %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
ret i32 %ret

View File

@ -62,7 +62,6 @@ define i32 @global_atomic_usub_sat(ptr addrspace(1) %ptr, i32 %data) {
; GFX12-GISEL-NEXT: global_atomic_sub_clamp_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: global_inv scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: global_atomic_usub_sat:
@ -118,7 +117,6 @@ define i32 @global_atomic_usub_sat(ptr addrspace(1) %ptr, i32 %data) {
; GFX12-SDAG-NEXT: global_atomic_sub_clamp_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: global_inv scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
%ret = atomicrmw usub_sat ptr addrspace(1) %ptr, i32 %data syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
ret i32 %ret
@ -184,7 +182,6 @@ define i32 @global_atomic_usub_sat_offset(ptr addrspace(1) %ptr, i32 %data) {
; GFX12-GISEL-NEXT: global_atomic_sub_clamp_u32 v0, v[0:1], v2, off offset:4096 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: global_inv scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: global_atomic_usub_sat_offset:
@ -249,7 +246,6 @@ define i32 @global_atomic_usub_sat_offset(ptr addrspace(1) %ptr, i32 %data) {
; GFX12-SDAG-NEXT: global_atomic_sub_clamp_u32 v0, v[0:1], v2, off offset:4096 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: global_inv scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 1024
%ret = atomicrmw usub_sat ptr addrspace(1) %gep, i32 %data syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
@ -309,7 +305,6 @@ define void @global_atomic_usub_sat_nortn(ptr addrspace(1) %ptr, i32 %data) {
; GFX12-GISEL-NEXT: global_atomic_sub_clamp_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: global_inv scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: global_atomic_usub_sat_nortn:
@ -364,7 +359,6 @@ define void @global_atomic_usub_sat_nortn(ptr addrspace(1) %ptr, i32 %data) {
; GFX12-SDAG-NEXT: global_atomic_sub_clamp_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: global_inv scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
%ret = atomicrmw usub_sat ptr addrspace(1) %ptr, i32 %data syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
ret void
@ -430,7 +424,6 @@ define void @global_atomic_usub_sat_offset_nortn(ptr addrspace(1) %ptr, i32 %dat
; GFX12-GISEL-NEXT: global_atomic_sub_clamp_u32 v0, v[0:1], v2, off offset:4096 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: global_inv scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: global_atomic_usub_sat_offset_nortn:
@ -495,7 +488,6 @@ define void @global_atomic_usub_sat_offset_nortn(ptr addrspace(1) %ptr, i32 %dat
; GFX12-SDAG-NEXT: global_atomic_sub_clamp_u32 v0, v[0:1], v2, off offset:4096 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: global_inv scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 1024
%ret = atomicrmw usub_sat ptr addrspace(1) %gep, i32 %data syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
@ -899,7 +891,6 @@ define i16 @global_atomic_usub_sat_16(ptr addrspace(1) %ptr, i16 %data) {
; GFX12-GISEL-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, v3
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: global_atomic_usub_sat_16:
@ -1011,7 +1002,6 @@ define i16 @global_atomic_usub_sat_16(ptr addrspace(1) %ptr, i16 %data) {
; GFX12-SDAG-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, v3
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
%ret = atomicrmw usub_sat ptr addrspace(1) %ptr, i16 %data syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
ret i16 %ret
@ -1128,7 +1118,6 @@ define i16 @global_atomic_usub_sat_offset_16(ptr addrspace(1) %ptr, i16 %data) {
; GFX12-GISEL-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, v3
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: global_atomic_usub_sat_offset_16:
@ -1241,7 +1230,6 @@ define i16 @global_atomic_usub_sat_offset_16(ptr addrspace(1) %ptr, i16 %data) {
; GFX12-SDAG-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, v3
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr i16, ptr addrspace(1) %ptr, i64 1024
%ret = atomicrmw usub_sat ptr addrspace(1) %gep, i16 %data syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
@ -1353,7 +1341,6 @@ define void @global_atomic_usub_sat_nortn_16(ptr addrspace(1) %ptr, i16 %data) {
; GFX12-GISEL-NEXT: s_cbranch_execnz .LBB8_1
; GFX12-GISEL-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: global_atomic_usub_sat_nortn_16:
@ -1460,7 +1447,6 @@ define void @global_atomic_usub_sat_nortn_16(ptr addrspace(1) %ptr, i16 %data) {
; GFX12-SDAG-NEXT: s_cbranch_execnz .LBB8_1
; GFX12-SDAG-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
%ret = atomicrmw usub_sat ptr addrspace(1) %ptr, i16 %data syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
ret void
@ -1573,7 +1559,6 @@ define void @global_atomic_usub_sat_offset_nortn_16(ptr addrspace(1) %ptr, i16 %
; GFX12-GISEL-NEXT: s_cbranch_execnz .LBB9_1
; GFX12-GISEL-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: global_atomic_usub_sat_offset_nortn_16:
@ -1682,7 +1667,6 @@ define void @global_atomic_usub_sat_offset_nortn_16(ptr addrspace(1) %ptr, i16 %
; GFX12-SDAG-NEXT: s_cbranch_execnz .LBB9_1
; GFX12-SDAG-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr i16, ptr addrspace(1) %ptr, i64 1024
%ret = atomicrmw usub_sat ptr addrspace(1) %gep, i16 %data syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
@ -2322,7 +2306,6 @@ define i8 @global_atomic_usub_sat_8(ptr addrspace(1) %ptr, i8 %data) {
; GFX12-GISEL-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, v3
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: global_atomic_usub_sat_8:
@ -2441,7 +2424,6 @@ define i8 @global_atomic_usub_sat_8(ptr addrspace(1) %ptr, i8 %data) {
; GFX12-SDAG-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, v3
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
%ret = atomicrmw usub_sat ptr addrspace(1) %ptr, i8 %data syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
ret i8 %ret
@ -2574,7 +2556,6 @@ define i8 @global_atomic_usub_sat_offset_8(ptr addrspace(1) %ptr, i8 %data) {
; GFX12-GISEL-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, v3
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: global_atomic_usub_sat_offset_8:
@ -2693,7 +2674,6 @@ define i8 @global_atomic_usub_sat_offset_8(ptr addrspace(1) %ptr, i8 %data) {
; GFX12-SDAG-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, v3
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr i8, ptr addrspace(1) %ptr, i64 1024
%ret = atomicrmw usub_sat ptr addrspace(1) %gep, i8 %data syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
@ -2821,7 +2801,6 @@ define void @global_atomic_usub_sat_nortn_8(ptr addrspace(1) %ptr, i8 %data) {
; GFX12-GISEL-NEXT: s_cbranch_execnz .LBB14_1
; GFX12-GISEL-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: global_atomic_usub_sat_nortn_8:
@ -2935,7 +2914,6 @@ define void @global_atomic_usub_sat_nortn_8(ptr addrspace(1) %ptr, i8 %data) {
; GFX12-SDAG-NEXT: s_cbranch_execnz .LBB14_1
; GFX12-SDAG-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
%ret = atomicrmw usub_sat ptr addrspace(1) %ptr, i8 %data syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
ret void
@ -3062,7 +3040,6 @@ define void @global_atomic_usub_sat_offset_nortn_8(ptr addrspace(1) %ptr, i8 %da
; GFX12-GISEL-NEXT: s_cbranch_execnz .LBB15_1
; GFX12-GISEL-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: global_atomic_usub_sat_offset_nortn_8:
@ -3176,7 +3153,6 @@ define void @global_atomic_usub_sat_offset_nortn_8(ptr addrspace(1) %ptr, i8 %da
; GFX12-SDAG-NEXT: s_cbranch_execnz .LBB15_1
; GFX12-SDAG-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr i8, ptr addrspace(1) %ptr, i64 1024
%ret = atomicrmw usub_sat ptr addrspace(1) %gep, i8 %data syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
@ -3791,7 +3767,6 @@ define i32 @global_atomic_usub_sat__amdgpu_no_remote_memory(ptr addrspace(1) %pt
; GFX12-GISEL-NEXT: global_atomic_sub_clamp_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: global_inv scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: global_atomic_usub_sat__amdgpu_no_remote_memory:
@ -3847,7 +3822,6 @@ define i32 @global_atomic_usub_sat__amdgpu_no_remote_memory(ptr addrspace(1) %pt
; GFX12-SDAG-NEXT: global_atomic_sub_clamp_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: global_inv scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
%ret = atomicrmw usub_sat ptr addrspace(1) %ptr, i32 %data syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
ret i32 %ret
@ -3907,7 +3881,6 @@ define i32 @global_atomic_usub_sat__amdgpu_no_fine_grained_memory(ptr addrspace(
; GFX12-GISEL-NEXT: global_atomic_sub_clamp_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: global_inv scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: global_atomic_usub_sat__amdgpu_no_fine_grained_memory:
@ -3963,7 +3936,6 @@ define i32 @global_atomic_usub_sat__amdgpu_no_fine_grained_memory(ptr addrspace(
; GFX12-SDAG-NEXT: global_atomic_sub_clamp_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: global_inv scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
%ret = atomicrmw usub_sat ptr addrspace(1) %ptr, i32 %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
ret i32 %ret
@ -4023,7 +3995,6 @@ define i32 @global_atomic_usub_sat__amdgpu_no_fine_grained_memory__amdgpu_no_rem
; GFX12-GISEL-NEXT: global_atomic_sub_clamp_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: global_inv scope:SCOPE_DEV
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: global_atomic_usub_sat__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -4079,7 +4050,6 @@ define i32 @global_atomic_usub_sat__amdgpu_no_fine_grained_memory__amdgpu_no_rem
; GFX12-SDAG-NEXT: global_atomic_sub_clamp_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: global_inv scope:SCOPE_DEV
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
%ret = atomicrmw usub_sat ptr addrspace(1) %ptr, i32 %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
ret i32 %ret

View File

@ -28,7 +28,6 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_g
; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
@ -213,7 +212,6 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_
; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], null offen offset:1024
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
@ -397,7 +395,6 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__waterfall__amdgp
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_mov_b32_e32 v0, v5
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
@ -786,7 +783,6 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_g
; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory:
@ -987,7 +983,6 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_
; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], null offen offset:1024
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_grained_memory:
@ -1181,7 +1176,6 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset(ptr addrspace(7)
; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset:
@ -1399,7 +1393,6 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote
; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote_memory:
@ -1617,7 +1610,6 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote
; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
@ -1858,7 +1850,6 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_
; GFX12-NEXT: s_cbranch_execnz .LBB8_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_grained_memory:
@ -2100,7 +2091,6 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f64__offset__amdgpu_no_fine_
; GFX12-NEXT: s_cbranch_execnz .LBB9_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f64__offset__amdgpu_no_fine_grained_memory:
@ -2370,7 +2360,6 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__waterfall__amdg
; GFX12-NEXT: s_cbranch_execnz .LBB10_3
; GFX12-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -2851,7 +2840,6 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_remot
; GFX12-NEXT: s_cbranch_execnz .LBB11_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_remote_memory:
@ -3112,7 +3100,6 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_
; GFX12-NEXT: s_cbranch_execnz .LBB12_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -3373,7 +3360,6 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, s4, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__amdgpu_no_fine_grained_memory:
@ -3422,7 +3408,6 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, s4, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__amdgpu_no_fine_grained_memory:
@ -3811,7 +3796,6 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset__amdgpu_no_fine_
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB14_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset__amdgpu_no_fine_grained_memory:
@ -3859,7 +3843,6 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset__amdgpu_no_fine_
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB14_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset__amdgpu_no_fine_grained_memory:
@ -4272,7 +4255,6 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu
; GFX12-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v4, v8
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -4355,7 +4337,6 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu
; GFX12-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v4, v8
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -5053,7 +5034,6 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__amdgpu_no_fine
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, s4, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__amdgpu_no_fine_grained_memory:
@ -5112,7 +5092,6 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__amdgpu_no_fine
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, s4, v2
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__amdgpu_no_fine_grained_memory:
@ -5573,7 +5552,6 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset__amdgpu_no_fine
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB17_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset__amdgpu_no_fine_grained_memory:
@ -5631,7 +5609,6 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset__amdgpu_no_fine
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB17_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset__amdgpu_no_fine_grained_memory:
@ -6115,7 +6092,6 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amd
; GFX12-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v4, v8
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -6209,7 +6185,6 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amd
; GFX12-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -6921,7 +6896,6 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no
; GFX12-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no_fine_grained_memory:
@ -7163,7 +7137,6 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_fin
; GFX12-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], null offen offset:1024
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_fine_grained_memory:
@ -7407,7 +7380,6 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__waterfall
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_mov_b32_e32 v0, v5
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -7875,7 +7847,6 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset(ptr addrsp
; GFX12-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset:
@ -8133,7 +8104,6 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset(ptr addrspace(
; GFX12-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], null offen offset:1024
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset:
@ -8387,7 +8357,6 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no
; GFX12-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no_remote_memory:
@ -8645,7 +8614,6 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_rem
; GFX12-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], null offen offset:1024
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_remote_memory:
@ -8903,7 +8871,6 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu
; GFX12-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu_no_fine_grained_memory:
@ -9330,7 +9297,6 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fi
; GFX12-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], null offen offset:1024
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fine_grained_memory:
@ -9764,7 +9730,6 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterf
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_mov_b32_e32 v0, v5
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -10473,7 +10438,6 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset(ptr add
; GFX12-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset:
@ -10900,7 +10864,6 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset(ptr addrspace
; GFX12-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], null offen offset:1024
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset:
@ -11314,7 +11277,6 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu
; GFX12-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu_no_remote_memory:
@ -11741,7 +11703,6 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_re
; GFX12-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], null offen offset:1024
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_remote_memory:
@ -12155,7 +12116,6 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fi
; GFX12-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], null offen offset:1024
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -12574,7 +12534,6 @@ define float @buffer_fat_ptr_system_atomic_fadd_ret_f32__offset__amdgpu_no_fine_
; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_system_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory:

View File

@ -28,7 +28,6 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_g
; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory:
@ -200,7 +199,6 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__offset__amdgpu_no_fine_
; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen offset:1024
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__offset__amdgpu_no_fine_grained_memory:
@ -388,7 +386,6 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__waterfall__amdgp
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_mov_b32_e32 v0, v5
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -752,7 +749,6 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_remote
; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_remote_memory:
@ -1003,7 +999,6 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_g
; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -1201,7 +1196,6 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_
; GFX12-NEXT: s_cbranch_execnz .LBB5_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_grained_memory:
@ -1383,7 +1377,6 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__offset__amdgpu_no_fine_
; GFX12-NEXT: s_cbranch_execnz .LBB6_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f64__offset__amdgpu_no_fine_grained_memory:
@ -1602,7 +1595,6 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__waterfall__amdg
; GFX12-NEXT: s_cbranch_execnz .LBB7_3
; GFX12-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -1985,7 +1977,6 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_remot
; GFX12-NEXT: s_cbranch_execnz .LBB8_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_remote_memory:
@ -2251,7 +2242,6 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_
; GFX12-NEXT: s_cbranch_execnz .LBB9_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -2453,7 +2443,6 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, s4, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__amdgpu_no_fine_grained_memory:
@ -2504,7 +2493,6 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, s4, v2
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__amdgpu_no_fine_grained_memory:
@ -2912,7 +2900,6 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset__amdgpu_no_fine_
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB11_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset__amdgpu_no_fine_grained_memory:
@ -2962,7 +2949,6 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset__amdgpu_no_fine_
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB11_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset__amdgpu_no_fine_grained_memory:
@ -3394,7 +3380,6 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu
; GFX12-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v9, v7
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -3480,7 +3465,6 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu
; GFX12-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -4194,7 +4178,6 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__amdgpu_no_fine
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, s4, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__amdgpu_no_fine_grained_memory:
@ -4253,7 +4236,6 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__amdgpu_no_fine
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, s4, v2
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__amdgpu_no_fine_grained_memory:
@ -4716,7 +4698,6 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset__amdgpu_no_fine
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB14_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset__amdgpu_no_fine_grained_memory:
@ -4774,7 +4755,6 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset__amdgpu_no_fine
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB14_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset__amdgpu_no_fine_grained_memory:
@ -5260,7 +5240,6 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amd
; GFX12-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v4, v8
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -5354,7 +5333,6 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amd
; GFX12-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -6089,7 +6067,6 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__amdgpu_no
; GFX12-NEXT: s_cbranch_execnz .LBB16_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__amdgpu_no_fine_grained_memory:
@ -6399,7 +6376,6 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_v2f16__offset__amdgpu_no_fin
; GFX12-NEXT: s_cbranch_execnz .LBB17_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_v2f16__offset__amdgpu_no_fine_grained_memory:
@ -6740,7 +6716,6 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__waterfall
; GFX12-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-NEXT: v_mov_b32_e32 v0, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -7323,7 +7298,6 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__amdgpu
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB19_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__amdgpu_no_fine_grained_memory:
@ -7377,7 +7351,6 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__amdgpu
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB19_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__amdgpu_no_fine_grained_memory:
@ -7839,7 +7812,6 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_v2bf16__offset__amdgpu_no_fi
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB20_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_v2bf16__offset__amdgpu_no_fine_grained_memory:
@ -7889,7 +7861,6 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_v2bf16__offset__amdgpu_no_fi
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB20_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_v2bf16__offset__amdgpu_no_fine_grained_memory:
@ -8378,7 +8349,6 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterf
; GFX12-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -8468,7 +8438,6 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterf
; GFX12-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -9182,7 +9151,6 @@ define float @buffer_fat_ptr_system_atomic_fmax_ret_f32__offset__amdgpu_no_fine_
; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_system_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory:

View File

@ -28,7 +28,6 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_g
; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory:
@ -200,7 +199,6 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f32__offset__amdgpu_no_fine_
; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen offset:1024
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__offset__amdgpu_no_fine_grained_memory:
@ -388,7 +386,6 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__waterfall__amdgp
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_mov_b32_e32 v0, v5
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -752,7 +749,6 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_remote
; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_remote_memory:
@ -1003,7 +999,6 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_g
; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -1201,7 +1196,6 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_
; GFX12-NEXT: s_cbranch_execnz .LBB5_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_grained_memory:
@ -1383,7 +1377,6 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f64__offset__amdgpu_no_fine_
; GFX12-NEXT: s_cbranch_execnz .LBB6_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__offset__amdgpu_no_fine_grained_memory:
@ -1602,7 +1595,6 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__waterfall__amdg
; GFX12-NEXT: s_cbranch_execnz .LBB7_3
; GFX12-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -1985,7 +1977,6 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_remot
; GFX12-NEXT: s_cbranch_execnz .LBB8_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_remote_memory:
@ -2251,7 +2242,6 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_
; GFX12-NEXT: s_cbranch_execnz .LBB9_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -2453,7 +2443,6 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, s4, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__amdgpu_no_fine_grained_memory:
@ -2504,7 +2493,6 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, s4, v2
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__amdgpu_no_fine_grained_memory:
@ -2912,7 +2900,6 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset__amdgpu_no_fine_
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB11_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset__amdgpu_no_fine_grained_memory:
@ -2962,7 +2949,6 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset__amdgpu_no_fine_
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB11_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset__amdgpu_no_fine_grained_memory:
@ -3394,7 +3380,6 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu
; GFX12-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v9, v7
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -3480,7 +3465,6 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu
; GFX12-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -4194,7 +4178,6 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__amdgpu_no_fine
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, s4, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__amdgpu_no_fine_grained_memory:
@ -4253,7 +4236,6 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__amdgpu_no_fine
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, s4, v2
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__amdgpu_no_fine_grained_memory:
@ -4716,7 +4698,6 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset__amdgpu_no_fine
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB14_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset__amdgpu_no_fine_grained_memory:
@ -4774,7 +4755,6 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset__amdgpu_no_fine
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB14_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset__amdgpu_no_fine_grained_memory:
@ -5260,7 +5240,6 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amd
; GFX12-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v4, v8
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -5354,7 +5333,6 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amd
; GFX12-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -6089,7 +6067,6 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__amdgpu_no
; GFX12-NEXT: s_cbranch_execnz .LBB16_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__amdgpu_no_fine_grained_memory:
@ -6399,7 +6376,6 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_v2f16__offset__amdgpu_no_fin
; GFX12-NEXT: s_cbranch_execnz .LBB17_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_v2f16__offset__amdgpu_no_fine_grained_memory:
@ -6740,7 +6716,6 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__waterfall
; GFX12-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-NEXT: v_mov_b32_e32 v0, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -7323,7 +7298,6 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__amdgpu
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB19_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__amdgpu_no_fine_grained_memory:
@ -7377,7 +7351,6 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__amdgpu
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB19_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__amdgpu_no_fine_grained_memory:
@ -7839,7 +7812,6 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_v2bf16__offset__amdgpu_no_fi
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB20_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_v2bf16__offset__amdgpu_no_fine_grained_memory:
@ -7889,7 +7861,6 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_v2bf16__offset__amdgpu_no_fi
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB20_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_v2bf16__offset__amdgpu_no_fine_grained_memory:
@ -8378,7 +8349,6 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterf
; GFX12-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -8468,7 +8438,6 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterf
; GFX12-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory:
@ -9182,7 +9151,6 @@ define float @buffer_fat_ptr_system_atomic_fmin_ret_f32__offset__amdgpu_no_fine_
; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: buffer_fat_ptr_system_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory:

View File

@ -20,7 +20,6 @@ define i32 @buffer_fat_ptr_agent_atomic_usub_cond_ret_u32__offset__amdgpu_no_fin
; GFX12-NEXT: buffer_atomic_cond_sub_u32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: buffer_fat_ptr_agent_atomic_usub_cond_ret_u32__offset__amdgpu_no_fine_grained_memory:
@ -71,7 +70,6 @@ define void @buffer_fat_ptr_agent_atomic_usub_cond_noret_u32__offset__amdgpu_no_
; GFX12-NEXT: buffer_atomic_cond_sub_u32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: buffer_fat_ptr_agent_atomic_usub_cond_noret_u32__offset__amdgpu_no_fine_grained_memory:
@ -121,7 +119,6 @@ define i32 @buffer_fat_ptr_agent_atomic_usub_cond_ret_u32__offset__amdgpu_no_rem
; GFX12-NEXT: buffer_atomic_cond_sub_u32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: buffer_fat_ptr_agent_atomic_usub_cond_ret_u32__offset__amdgpu_no_remote_memory:
@ -172,7 +169,6 @@ define i32 @buffer_fat_ptr_agent_atomic_usub_cond_ret_u32__offset__amdgpu_no_fin
; GFX12-NEXT: buffer_atomic_cond_sub_u32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: buffer_fat_ptr_agent_atomic_usub_cond_ret_u32__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -228,7 +224,6 @@ define i32 @buffer_fat_ptr_system_atomic_usub_cond_ret_u32__offset__amdgpu_no_fi
; GFX12-NEXT: buffer_atomic_cond_sub_u32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: buffer_fat_ptr_system_atomic_usub_cond_ret_u32__offset__amdgpu_no_fine_grained_memory:

View File

@ -20,7 +20,6 @@ define i32 @buffer_fat_ptr_agent_atomic_usub_sat_ret_u32__offset__amdgpu_no_fine
; GFX12-NEXT: buffer_atomic_sub_clamp_u32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: buffer_fat_ptr_agent_atomic_usub_sat_ret_u32__offset__amdgpu_no_fine_grained_memory:
@ -77,7 +76,6 @@ define void @buffer_fat_ptr_agent_atomic_usub_sat_noret_u32__offset__amdgpu_no_f
; GFX12-NEXT: buffer_atomic_sub_clamp_u32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: buffer_fat_ptr_agent_atomic_usub_sat_noret_u32__offset__amdgpu_no_fine_grained_memory:
@ -133,7 +131,6 @@ define i32 @buffer_fat_ptr_agent_atomic_usub_sat_ret_u32__offset__amdgpu_no_remo
; GFX12-NEXT: buffer_atomic_sub_clamp_u32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: buffer_fat_ptr_agent_atomic_usub_sat_ret_u32__offset__amdgpu_no_remote_memory:
@ -190,7 +187,6 @@ define i32 @buffer_fat_ptr_agent_atomic_usub_sat_ret_u32__offset__amdgpu_no_fine
; GFX12-NEXT: buffer_atomic_sub_clamp_u32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: buffer_fat_ptr_agent_atomic_usub_sat_ret_u32__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -252,7 +248,6 @@ define i32 @buffer_fat_ptr_system_atomic_usub_sat_ret_u32__offset__amdgpu_no_fin
; GFX12-NEXT: buffer_atomic_sub_clamp_u32 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: buffer_fat_ptr_system_atomic_usub_sat_ret_u32__offset__amdgpu_no_fine_grained_memory:

View File

@ -26,7 +26,6 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory__amd
; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
@ -203,7 +202,6 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grai
; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
@ -390,7 +388,6 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grai
; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:-2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
@ -587,7 +584,6 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__am
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
@ -794,7 +790,6 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gra
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
@ -1012,7 +1007,6 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_gra
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:-2048 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
@ -1237,7 +1231,6 @@ define float @flat_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_gra
; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
@ -1427,7 +1420,6 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gr
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
@ -1647,7 +1639,6 @@ define void @flat_agent_atomic_fadd_noret_f32_maybe_remote(ptr %ptr, float %val)
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32_maybe_remote:
@ -1812,7 +1803,6 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory(pt
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory:
@ -1964,7 +1954,6 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__a
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
@ -2182,7 +2171,6 @@ define void @flat_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode(ptr %p
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode:
@ -2351,7 +2339,6 @@ define float @flat_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory
; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory:
@ -2528,7 +2515,6 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine
; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -2715,7 +2701,6 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine
; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:-2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
@ -2912,7 +2897,6 @@ define void @flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memor
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory:
@ -3119,7 +3103,6 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fin
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -3337,7 +3320,6 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fin
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:-2048 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
@ -3562,7 +3544,6 @@ define float @flat_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fin
; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -3752,7 +3733,6 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -3973,7 +3953,6 @@ define float @flat_agent_atomic_fadd_ret_f32__ieee__amdgpu_no_fine_grained_memor
; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
@ -4163,7 +4142,6 @@ define void @flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memo
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
@ -4383,7 +4361,6 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ig
; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
@ -4548,7 +4525,6 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_i
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
@ -4706,7 +4682,6 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory(ptr %ptr,
; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory:
@ -4871,7 +4846,6 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory(ptr %ptr,
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory:
@ -5029,7 +5003,6 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdg
; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
@ -5206,7 +5179,6 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amd
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
@ -5413,7 +5385,6 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdg
; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory:
@ -5562,7 +5533,6 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amd
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory:
@ -5753,7 +5723,6 @@ define double @flat_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory:
@ -6159,7 +6128,6 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX12-NEXT: .LBB31_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB31_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[0:1], v[4:5]
@ -6620,7 +6588,6 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX12-NEXT: .LBB32_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB32_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[0:1], v[4:5]
@ -7078,7 +7045,6 @@ define void @flat_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: .LBB33_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB33_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[6:7], v[0:1]
@ -7511,7 +7477,6 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX12-NEXT: .LBB34_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB34_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[6:7], v[0:1]
@ -7960,7 +7925,6 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX12-NEXT: .LBB35_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB35_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[6:7], v[0:1]
@ -8432,7 +8396,6 @@ define half @flat_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory(ptr %
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory:
@ -8477,7 +8440,6 @@ define half @flat_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory(ptr %
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory:
@ -8805,7 +8767,6 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grain
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -8851,7 +8812,6 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grain
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -9189,7 +9149,6 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grain
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -9235,7 +9194,6 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grain
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -9572,7 +9530,6 @@ define void @flat_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory(ptr
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB39_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory:
@ -9615,7 +9572,6 @@ define void @flat_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory(ptr
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB39_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory:
@ -9932,7 +9888,6 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_gra
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB40_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -9976,7 +9931,6 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_gra
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB40_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -10303,7 +10257,6 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_gra
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB41_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -10347,7 +10300,6 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_gra
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB41_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -10663,7 +10615,6 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB42_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -10695,7 +10646,6 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB42_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -10938,7 +10888,6 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fi
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -10972,7 +10921,6 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fi
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -11235,7 +11183,6 @@ define half @flat_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grai
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -11282,7 +11229,6 @@ define half @flat_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grai
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -11622,7 +11568,6 @@ define void @flat_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB45_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -11667,7 +11612,6 @@ define void @flat_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB45_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -12013,7 +11957,6 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory(pt
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory:
@ -12067,7 +12010,6 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory(pt
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory:
@ -12466,7 +12408,6 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -12523,7 +12464,6 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -12933,7 +12873,6 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -12990,7 +12929,6 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -13399,7 +13337,6 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB49_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -13454,7 +13391,6 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB49_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -13851,7 +13787,6 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB50_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -13906,7 +13841,6 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB50_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -14294,7 +14228,6 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -14338,7 +14271,6 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -14665,7 +14597,6 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB52_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -14707,7 +14638,6 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB52_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -15037,7 +14967,6 @@ define void @flat_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory(pt
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB53_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory:
@ -15089,7 +15018,6 @@ define void @flat_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory(pt
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB53_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory:
@ -15478,7 +15406,6 @@ define bfloat @flat_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_g
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -15536,7 +15463,6 @@ define bfloat @flat_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_g
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -15947,7 +15873,6 @@ define void @flat_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_g
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB55_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -16003,7 +15928,6 @@ define void @flat_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_g
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB55_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -16364,7 +16288,6 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memo
; GFX12-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory:
@ -16552,7 +16475,6 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fi
; GFX12-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -16743,7 +16665,6 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fi
; GFX12-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:-2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -16947,7 +16868,6 @@ define void @flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory(p
; GFX12-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory:
@ -17127,7 +17047,6 @@ define void @flat_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_g
; GFX12-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:2044 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -17314,7 +17233,6 @@ define void @flat_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_g
; GFX12-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:-2048 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -17518,7 +17436,6 @@ define <2 x half> @flat_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_f
; GFX12-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -17712,7 +17629,6 @@ define void @flat_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_
; GFX12-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:2044 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -17901,7 +17817,6 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory(ptr
; GFX12-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory:
@ -18089,7 +18004,6 @@ define void @flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory(ptr %pt
; GFX12-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory:
@ -18269,7 +18183,6 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memo
; GFX12-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -18457,7 +18370,6 @@ define void @flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__
; GFX12-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -18641,7 +18553,6 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_m
; GFX12-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory:
@ -18964,7 +18875,6 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no
; GFX12-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -19290,7 +19200,6 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no
; GFX12-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:-2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -19630,7 +19539,6 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory(
; GFX12-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory:
@ -19943,7 +19851,6 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_
; GFX12-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:2044 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -20263,7 +20170,6 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_
; GFX12-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:-2048 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -20603,7 +20509,6 @@ define <2 x bfloat> @flat_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_n
; GFX12-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -20932,7 +20837,6 @@ define void @flat_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine
; GFX12-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:2044 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -21254,7 +21158,6 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memory(
; GFX12-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memory:
@ -21577,7 +21480,6 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory(ptr %p
; GFX12-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory:
@ -21890,7 +21792,6 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_m
; GFX12-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -22213,7 +22114,6 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory_
; GFX12-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:

View File

@ -26,7 +26,6 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: flat_atomic_max_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
@ -169,7 +168,6 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grai
; GFX12-NEXT: flat_atomic_max_num_f32 v0, v[0:1], v2 offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -318,7 +316,6 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grai
; GFX12-NEXT: flat_atomic_max_num_f32 v0, v[0:1], v2 offset:-2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -483,7 +480,6 @@ define void @flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: flat_atomic_max_num_f32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
@ -624,7 +620,6 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_gra
; GFX12-NEXT: flat_atomic_max_num_f32 v[0:1], v2 offset:2044 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -772,7 +767,6 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_gra
; GFX12-NEXT: flat_atomic_max_num_f32 v[0:1], v2 offset:-2048 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -940,7 +934,6 @@ define float @flat_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_gra
; GFX12-NEXT: flat_atomic_max_num_f32 v0, v[0:1], v2 offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -1092,7 +1085,6 @@ define void @flat_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_gr
; GFX12-NEXT: flat_atomic_max_num_f32 v[0:1], v2 offset:2044 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -1242,7 +1234,6 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory(ptr %ptr,
; GFX12-NEXT: flat_atomic_max_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory:
@ -1435,7 +1426,6 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__amd
; GFX12-NEXT: flat_atomic_max_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -1582,7 +1572,6 @@ define float @flat_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memory
; GFX12-NEXT: flat_atomic_max_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memory:
@ -1725,7 +1714,6 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine
; GFX12-NEXT: flat_atomic_max_num_f32 v0, v[0:1], v2 offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -1874,7 +1862,6 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine
; GFX12-NEXT: flat_atomic_max_num_f32 v0, v[0:1], v2 offset:-2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
@ -2039,7 +2026,6 @@ define void @flat_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_memor
; GFX12-NEXT: flat_atomic_max_num_f32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_memory:
@ -2180,7 +2166,6 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fin
; GFX12-NEXT: flat_atomic_max_num_f32 v[0:1], v2 offset:2044 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -2328,7 +2313,6 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fin
; GFX12-NEXT: flat_atomic_max_num_f32 v[0:1], v2 offset:-2048 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
@ -2496,7 +2480,6 @@ define float @flat_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fin
; GFX12-NEXT: flat_atomic_max_num_f32 v0, v[0:1], v2 offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -2648,7 +2631,6 @@ define void @flat_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
; GFX12-NEXT: flat_atomic_max_num_f32 v[0:1], v2 offset:2044 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -2848,7 +2830,6 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
@ -3206,7 +3187,6 @@ define double @flat_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX12-NEXT: .LBB19_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB19_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[0:1], v[4:5]
@ -3618,7 +3598,6 @@ define double @flat_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX12-NEXT: .LBB20_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB20_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[0:1], v[4:5]
@ -4027,7 +4006,6 @@ define void @flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: .LBB21_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB21_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[4:5], v[0:1]
@ -4416,7 +4394,6 @@ define void @flat_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX12-NEXT: .LBB22_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB22_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[2:3], v[6:7]
@ -4821,7 +4798,6 @@ define void @flat_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX12-NEXT: .LBB23_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB23_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[2:3], v[6:7]
@ -5260,7 +5236,6 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_remote_memory(ptr %ptr,
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_remote_memory:
@ -5684,7 +5659,6 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__am
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -6065,7 +6039,6 @@ define half @flat_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory(ptr %
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory:
@ -6112,7 +6085,6 @@ define half @flat_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory(ptr %
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory:
@ -6460,7 +6432,6 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grain
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -6509,7 +6480,6 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grain
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -6869,7 +6839,6 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grain
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -6918,7 +6887,6 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grain
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -7274,7 +7242,6 @@ define void @flat_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory(ptr
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB29_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory:
@ -7320,7 +7287,6 @@ define void @flat_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory(ptr
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB29_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory:
@ -7656,7 +7622,6 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_gra
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB30_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -7704,7 +7669,6 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_gra
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB30_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -8052,7 +8016,6 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_gra
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB31_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -8100,7 +8063,6 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_gra
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB31_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -8437,7 +8399,6 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fi
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -8473,7 +8434,6 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fi
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -8739,7 +8699,6 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB33_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -8774,7 +8733,6 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB33_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -9050,7 +9008,6 @@ define half @flat_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grai
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -9100,7 +9057,6 @@ define half @flat_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grai
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -9461,7 +9417,6 @@ define void @flat_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB35_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -9510,7 +9465,6 @@ define void @flat_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB35_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -9873,7 +9827,6 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory(pt
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory:
@ -9927,7 +9880,6 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory(pt
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory:
@ -10327,7 +10279,6 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -10384,7 +10335,6 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -10795,7 +10745,6 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -10852,7 +10801,6 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -11261,7 +11209,6 @@ define void @flat_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory(pt
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB39_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory:
@ -11313,7 +11260,6 @@ define void @flat_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory(pt
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB39_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory:
@ -11700,7 +11646,6 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB40_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -11755,7 +11700,6 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB40_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -12153,7 +12097,6 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB41_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -12208,7 +12151,6 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB41_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -12597,7 +12539,6 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -12641,7 +12582,6 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -12969,7 +12909,6 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB43_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -13011,7 +12950,6 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB43_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -13346,7 +13284,6 @@ define bfloat @flat_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_g
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -13404,7 +13341,6 @@ define bfloat @flat_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_g
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -13816,7 +13752,6 @@ define void @flat_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_g
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB45_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -13872,7 +13807,6 @@ define void @flat_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_g
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB45_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -14253,7 +14187,6 @@ define <2 x half> @flat_agent_atomic_fmax_ret_v2f16__amdgpu_no_fine_grained_memo
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_v2f16__amdgpu_no_fine_grained_memory:
@ -14489,7 +14422,6 @@ define <2 x half> @flat_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fi
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -14728,7 +14660,6 @@ define <2 x half> @flat_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_fi
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -14981,7 +14912,6 @@ define void @flat_agent_atomic_fmax_noret_v2f16__amdgpu_no_fine_grained_memory(p
; GFX12-NEXT: s_cbranch_execnz .LBB49_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_v2f16__amdgpu_no_fine_grained_memory:
@ -15208,7 +15138,6 @@ define void @flat_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_g
; GFX12-NEXT: s_cbranch_execnz .LBB50_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -15442,7 +15371,6 @@ define void @flat_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine_g
; GFX12-NEXT: s_cbranch_execnz .LBB51_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -15697,7 +15625,6 @@ define <2 x half> @flat_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_f
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -15938,7 +15865,6 @@ define void @flat_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_
; GFX12-NEXT: s_cbranch_execnz .LBB53_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -16199,7 +16125,6 @@ define <2 x bfloat> @flat_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained_m
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained_memory:
@ -16251,7 +16176,6 @@ define <2 x bfloat> @flat_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained_m
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained_memory:
@ -16646,7 +16570,6 @@ define <2 x bfloat> @flat_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -16698,7 +16621,6 @@ define <2 x bfloat> @flat_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -17096,7 +17018,6 @@ define <2 x bfloat> @flat_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -17148,7 +17069,6 @@ define <2 x bfloat> @flat_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -17559,7 +17479,6 @@ define void @flat_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memory(
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB57_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memory:
@ -17610,7 +17529,6 @@ define void @flat_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memory(
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB57_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memory:
@ -17991,7 +17909,6 @@ define void @flat_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB58_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -18042,7 +17959,6 @@ define void @flat_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB58_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -18430,7 +18346,6 @@ define void @flat_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB59_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -18481,7 +18396,6 @@ define void @flat_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB59_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -18895,7 +18809,6 @@ define <2 x bfloat> @flat_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_n
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -18948,7 +18861,6 @@ define <2 x bfloat> @flat_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_n
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -19346,7 +19258,6 @@ define void @flat_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB61_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -19398,7 +19309,6 @@ define void @flat_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB61_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:

View File

@ -26,7 +26,6 @@ define float @flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: flat_atomic_min_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
@ -169,7 +168,6 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grai
; GFX12-NEXT: flat_atomic_min_num_f32 v0, v[0:1], v2 offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -318,7 +316,6 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_grai
; GFX12-NEXT: flat_atomic_min_num_f32 v0, v[0:1], v2 offset:-2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -483,7 +480,6 @@ define void @flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: flat_atomic_min_num_f32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
@ -624,7 +620,6 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_gra
; GFX12-NEXT: flat_atomic_min_num_f32 v[0:1], v2 offset:2044 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -772,7 +767,6 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_gra
; GFX12-NEXT: flat_atomic_min_num_f32 v[0:1], v2 offset:-2048 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -940,7 +934,6 @@ define float @flat_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_gra
; GFX12-NEXT: flat_atomic_min_num_f32 v0, v[0:1], v2 offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -1092,7 +1085,6 @@ define void @flat_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_gr
; GFX12-NEXT: flat_atomic_min_num_f32 v[0:1], v2 offset:2044 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -1242,7 +1234,6 @@ define float @flat_agent_atomic_fmin_ret_f32__amdgpu_no_remote_memory(ptr %ptr,
; GFX12-NEXT: flat_atomic_min_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_remote_memory:
@ -1435,7 +1426,6 @@ define float @flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory__amd
; GFX12-NEXT: flat_atomic_min_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -1582,7 +1572,6 @@ define float @flat_agent_atomic_fmin_ret_f32__ftz__amdgpu_no_fine_grained_memory
; GFX12-NEXT: flat_atomic_min_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__ftz__amdgpu_no_fine_grained_memory:
@ -1725,7 +1714,6 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine
; GFX12-NEXT: flat_atomic_min_num_f32 v0, v[0:1], v2 offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -1874,7 +1862,6 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fine
; GFX12-NEXT: flat_atomic_min_num_f32 v0, v[0:1], v2 offset:-2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
@ -2039,7 +2026,6 @@ define void @flat_agent_atomic_fmin_noret_f32__ftz__amdgpu_no_fine_grained_memor
; GFX12-NEXT: flat_atomic_min_num_f32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__ftz__amdgpu_no_fine_grained_memory:
@ -2180,7 +2166,6 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fin
; GFX12-NEXT: flat_atomic_min_num_f32 v[0:1], v2 offset:2044 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -2328,7 +2313,6 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_fin
; GFX12-NEXT: flat_atomic_min_num_f32 v[0:1], v2 offset:-2048 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
@ -2496,7 +2480,6 @@ define float @flat_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fin
; GFX12-NEXT: flat_atomic_min_num_f32 v0, v[0:1], v2 offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -2648,7 +2631,6 @@ define void @flat_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
; GFX12-NEXT: flat_atomic_min_num_f32 v[0:1], v2 offset:2044 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -2848,7 +2830,6 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
@ -3206,7 +3187,6 @@ define double @flat_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX12-NEXT: .LBB19_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB19_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[0:1], v[4:5]
@ -3618,7 +3598,6 @@ define double @flat_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX12-NEXT: .LBB20_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB20_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[0:1], v[4:5]
@ -4027,7 +4006,6 @@ define void @flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(ptr
; GFX12-NEXT: .LBB21_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB21_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[4:5], v[0:1]
@ -4416,7 +4394,6 @@ define void @flat_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX12-NEXT: .LBB22_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB22_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[2:3], v[6:7]
@ -4821,7 +4798,6 @@ define void @flat_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX12-NEXT: .LBB23_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB23_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[2:3], v[6:7]
@ -5260,7 +5236,6 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_remote_memory(ptr %ptr,
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_remote_memory:
@ -5684,7 +5659,6 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__am
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -6065,7 +6039,6 @@ define half @flat_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory(ptr %
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory:
@ -6112,7 +6085,6 @@ define half @flat_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory(ptr %
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory:
@ -6460,7 +6432,6 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grain
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -6509,7 +6480,6 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grain
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -6869,7 +6839,6 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grain
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -6918,7 +6887,6 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grain
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -7274,7 +7242,6 @@ define void @flat_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory(ptr
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB29_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory:
@ -7320,7 +7287,6 @@ define void @flat_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory(ptr
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB29_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory:
@ -7656,7 +7622,6 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_gra
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB30_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -7704,7 +7669,6 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_gra
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB30_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -8052,7 +8016,6 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_gra
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB31_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -8100,7 +8063,6 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_gra
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB31_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -8437,7 +8399,6 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fi
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -8473,7 +8434,6 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fi
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -8739,7 +8699,6 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB33_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -8774,7 +8733,6 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB33_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -9050,7 +9008,6 @@ define half @flat_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grai
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -9100,7 +9057,6 @@ define half @flat_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grai
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -9461,7 +9417,6 @@ define void @flat_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB35_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -9510,7 +9465,6 @@ define void @flat_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB35_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -9873,7 +9827,6 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory(pt
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory:
@ -9927,7 +9880,6 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory(pt
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory:
@ -10327,7 +10279,6 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -10384,7 +10335,6 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -10795,7 +10745,6 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -10852,7 +10801,6 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -11261,7 +11209,6 @@ define void @flat_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory(pt
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB39_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory:
@ -11313,7 +11260,6 @@ define void @flat_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory(pt
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB39_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory:
@ -11700,7 +11646,6 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB40_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -11755,7 +11700,6 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB40_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -12153,7 +12097,6 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB41_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -12208,7 +12151,6 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB41_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -12597,7 +12539,6 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -12641,7 +12582,6 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -12969,7 +12909,6 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB43_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -13011,7 +12950,6 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB43_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -13346,7 +13284,6 @@ define bfloat @flat_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_g
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -13404,7 +13341,6 @@ define bfloat @flat_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_g
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -13816,7 +13752,6 @@ define void @flat_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_g
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB45_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -13872,7 +13807,6 @@ define void @flat_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_g
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB45_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -14253,7 +14187,6 @@ define <2 x half> @flat_agent_atomic_fmin_ret_v2f16__amdgpu_no_fine_grained_memo
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_v2f16__amdgpu_no_fine_grained_memory:
@ -14489,7 +14422,6 @@ define <2 x half> @flat_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fi
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -14728,7 +14660,6 @@ define <2 x half> @flat_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_fi
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -14981,7 +14912,6 @@ define void @flat_agent_atomic_fmin_noret_v2f16__amdgpu_no_fine_grained_memory(p
; GFX12-NEXT: s_cbranch_execnz .LBB49_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_v2f16__amdgpu_no_fine_grained_memory:
@ -15208,7 +15138,6 @@ define void @flat_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_g
; GFX12-NEXT: s_cbranch_execnz .LBB50_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -15442,7 +15371,6 @@ define void @flat_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine_g
; GFX12-NEXT: s_cbranch_execnz .LBB51_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -15697,7 +15625,6 @@ define <2 x half> @flat_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_f
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -15938,7 +15865,6 @@ define void @flat_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_
; GFX12-NEXT: s_cbranch_execnz .LBB53_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -16199,7 +16125,6 @@ define <2 x bfloat> @flat_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained_m
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained_memory:
@ -16251,7 +16176,6 @@ define <2 x bfloat> @flat_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained_m
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained_memory:
@ -16646,7 +16570,6 @@ define <2 x bfloat> @flat_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -16698,7 +16621,6 @@ define <2 x bfloat> @flat_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -17096,7 +17018,6 @@ define <2 x bfloat> @flat_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -17148,7 +17069,6 @@ define <2 x bfloat> @flat_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -17559,7 +17479,6 @@ define void @flat_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memory(
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB57_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memory:
@ -17610,7 +17529,6 @@ define void @flat_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memory(
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB57_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memory:
@ -17991,7 +17909,6 @@ define void @flat_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB58_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -18042,7 +17959,6 @@ define void @flat_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB58_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -18430,7 +18346,6 @@ define void @flat_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB59_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -18481,7 +18396,6 @@ define void @flat_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB59_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -18895,7 +18809,6 @@ define <2 x bfloat> @flat_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_n
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -18948,7 +18861,6 @@ define <2 x bfloat> @flat_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_n
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -19346,7 +19258,6 @@ define void @flat_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB61_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -19398,7 +19309,6 @@ define void @flat_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB61_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:

View File

@ -43,7 +43,6 @@ define float @flat_agent_atomic_fsub_ret_f32(ptr %ptr, float %val) #0 {
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_f32:
@ -239,7 +238,6 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_pos(ptr %ptr, float %val
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_pos:
@ -439,7 +437,6 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_neg(ptr %ptr, float %val
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_neg:
@ -652,7 +649,6 @@ define void @flat_agent_atomic_fsub_noret_f32(ptr %ptr, float %val) #0 {
; GFX12-NEXT: s_cbranch_execnz .LBB3_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_f32:
@ -838,7 +834,6 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_pos(ptr %ptr, float %va
; GFX12-NEXT: s_cbranch_execnz .LBB4_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_pos:
@ -1031,7 +1026,6 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_neg(ptr %ptr, float %va
; GFX12-NEXT: s_cbranch_execnz .LBB5_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_neg:
@ -1246,7 +1240,6 @@ define float @flat_system_atomic_fsub_ret_f32__offset12b_pos(ptr %ptr, float %va
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fsub_ret_f32__offset12b_pos:
@ -1447,7 +1440,6 @@ define void @flat_system_atomic_fsub_noret_f32__offset12b_pos(ptr %ptr, float %v
; GFX12-NEXT: s_cbranch_execnz .LBB7_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fsub_noret_f32__offset12b_pos:
@ -1648,7 +1640,6 @@ define float @flat_agent_atomic_fsub_ret_f32__ftz(ptr %ptr, float %val) #1 {
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_f32__ftz:
@ -1844,7 +1835,6 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr %ptr, float
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_pos__ftz:
@ -2044,7 +2034,6 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_neg__ftz(ptr %ptr, float
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_neg__ftz:
@ -2257,7 +2246,6 @@ define void @flat_agent_atomic_fsub_noret_f32__ftz(ptr %ptr, float %val) #1 {
; GFX12-NEXT: s_cbranch_execnz .LBB11_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_f32__ftz:
@ -2443,7 +2431,6 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr %ptr, floa
; GFX12-NEXT: s_cbranch_execnz .LBB12_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_pos__ftz:
@ -2636,7 +2623,6 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_neg__ftz(ptr %ptr, floa
; GFX12-NEXT: s_cbranch_execnz .LBB13_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_neg__ftz:
@ -2851,7 +2837,6 @@ define float @flat_system_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr %ptr, floa
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fsub_ret_f32__offset12b_pos__ftz:
@ -3052,7 +3037,6 @@ define void @flat_system_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr %ptr, flo
; GFX12-NEXT: s_cbranch_execnz .LBB15_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fsub_noret_f32__offset12b_pos__ftz:
@ -3278,7 +3262,6 @@ define double @flat_agent_atomic_fsub_ret_f64(ptr %ptr, double %val) #0 {
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_f64:
@ -3661,7 +3644,6 @@ define double @flat_agent_atomic_fsub_ret_f64__offset12b_pos(ptr %ptr, double %v
; GFX12-NEXT: .LBB17_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB17_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[0:1], v[4:5]
@ -4107,7 +4089,6 @@ define double @flat_agent_atomic_fsub_ret_f64__offset12b_neg(ptr %ptr, double %v
; GFX12-NEXT: .LBB18_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB18_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[0:1], v[4:5]
@ -4550,7 +4531,6 @@ define void @flat_agent_atomic_fsub_noret_f64(ptr %ptr, double %val) #0 {
; GFX12-NEXT: .LBB19_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB19_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[6:7], v[0:1]
@ -4970,7 +4950,6 @@ define void @flat_agent_atomic_fsub_noret_f64__offset12b_pos(ptr %ptr, double %v
; GFX12-NEXT: .LBB20_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB20_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[6:7], v[0:1]
@ -5406,7 +5385,6 @@ define void @flat_agent_atomic_fsub_noret_f64__offset12b_neg(ptr %ptr, double %v
; GFX12-NEXT: .LBB21_2: ; %atomicrmw.phi
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
; GFX12-NEXT: .LBB21_3: ; %atomicrmw.global
; GFX12-NEXT: flat_load_b64 v[6:7], v[0:1]
@ -5865,7 +5843,6 @@ define half @flat_agent_atomic_fsub_ret_f16(ptr %ptr, half %val) #0 {
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_ret_f16:
@ -5910,7 +5887,6 @@ define half @flat_agent_atomic_fsub_ret_f16(ptr %ptr, half %val) #0 {
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_f16:
@ -6238,7 +6214,6 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_pos(ptr %ptr, half %val)
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_pos:
@ -6284,7 +6259,6 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_pos(ptr %ptr, half %val)
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_pos:
@ -6622,7 +6596,6 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_neg(ptr %ptr, half %val)
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_neg:
@ -6668,7 +6641,6 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_neg(ptr %ptr, half %val)
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_neg:
@ -7005,7 +6977,6 @@ define void @flat_agent_atomic_fsub_noret_f16(ptr %ptr, half %val) #0 {
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB25_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_noret_f16:
@ -7048,7 +7019,6 @@ define void @flat_agent_atomic_fsub_noret_f16(ptr %ptr, half %val) #0 {
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB25_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_f16:
@ -7365,7 +7335,6 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b_pos(ptr %ptr, half %val
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB26_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b_pos:
@ -7409,7 +7378,6 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b_pos(ptr %ptr, half %val
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB26_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b_pos:
@ -7736,7 +7704,6 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b_neg(ptr %ptr, half %val
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB27_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b_neg:
@ -7780,7 +7747,6 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b_neg(ptr %ptr, half %val
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB27_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b_neg:
@ -8097,7 +8063,6 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_pos__align4(ptr %ptr, hal
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_pos__align4:
@ -8131,7 +8096,6 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_pos__align4(ptr %ptr, hal
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_pos__align4:
@ -8380,7 +8344,6 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b__align4_pos(ptr %ptr, h
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB29_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b__align4_pos:
@ -8412,7 +8375,6 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b__align4_pos(ptr %ptr, h
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB29_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b__align4_pos:
@ -8668,7 +8630,6 @@ define half @flat_system_atomic_fsub_ret_f16__offset12b_pos(ptr %ptr, half %val)
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fsub_ret_f16__offset12b_pos:
@ -8715,7 +8676,6 @@ define half @flat_system_atomic_fsub_ret_f16__offset12b_pos(ptr %ptr, half %val)
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fsub_ret_f16__offset12b_pos:
@ -9055,7 +9015,6 @@ define void @flat_system_atomic_fsub_noret_f16__offset12b_pos(ptr %ptr, half %va
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB31_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fsub_noret_f16__offset12b_pos:
@ -9100,7 +9059,6 @@ define void @flat_system_atomic_fsub_noret_f16__offset12b_pos(ptr %ptr, half %va
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB31_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fsub_noret_f16__offset12b_pos:
@ -9446,7 +9404,6 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16(ptr %ptr, bfloat %val) #0 {
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_ret_bf16:
@ -9500,7 +9457,6 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16(ptr %ptr, bfloat %val) #0 {
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_bf16:
@ -9899,7 +9855,6 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_pos(ptr %ptr, bfloat %
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_pos:
@ -9956,7 +9911,6 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_pos(ptr %ptr, bfloat %
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_pos:
@ -10366,7 +10320,6 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_neg(ptr %ptr, bfloat %
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_neg:
@ -10423,7 +10376,6 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_neg(ptr %ptr, bfloat %
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_neg:
@ -10831,7 +10783,6 @@ define void @flat_agent_atomic_fsub_noret_bf16(ptr %ptr, bfloat %val) #0 {
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB35_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_noret_bf16:
@ -10883,7 +10834,6 @@ define void @flat_agent_atomic_fsub_noret_bf16(ptr %ptr, bfloat %val) #0 {
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB35_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_bf16:
@ -11269,7 +11219,6 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b_pos(ptr %ptr, bfloat %
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB36_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b_pos:
@ -11324,7 +11273,6 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b_pos(ptr %ptr, bfloat %
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB36_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b_pos:
@ -11721,7 +11669,6 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b_neg(ptr %ptr, bfloat %
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB37_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b_neg:
@ -11776,7 +11723,6 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b_neg(ptr %ptr, bfloat %
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB37_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b_neg:
@ -12164,7 +12110,6 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_pos__align4(ptr %ptr,
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_pos__align4:
@ -12208,7 +12153,6 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_pos__align4(ptr %ptr,
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_pos__align4:
@ -12535,7 +12479,6 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b__align4_pos(ptr %ptr,
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB39_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b__align4_pos:
@ -12577,7 +12520,6 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b__align4_pos(ptr %ptr,
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB39_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b__align4_pos:
@ -12911,7 +12853,6 @@ define bfloat @flat_system_atomic_fsub_ret_bf16__offset12b_pos(ptr %ptr, bfloat
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fsub_ret_bf16__offset12b_pos:
@ -12969,7 +12910,6 @@ define bfloat @flat_system_atomic_fsub_ret_bf16__offset12b_pos(ptr %ptr, bfloat
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fsub_ret_bf16__offset12b_pos:
@ -13380,7 +13320,6 @@ define void @flat_system_atomic_fsub_noret_bf16__offset12b_pos(ptr %ptr, bfloat
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB41_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fsub_noret_bf16__offset12b_pos:
@ -13436,7 +13375,6 @@ define void @flat_system_atomic_fsub_noret_bf16__offset12b_pos(ptr %ptr, bfloat
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB41_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fsub_noret_bf16__offset12b_pos:
@ -13814,7 +13752,6 @@ define <2 x half> @flat_agent_atomic_fsub_ret_v2f16(ptr %ptr, <2 x half> %val) #
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_v2f16:
@ -14033,7 +13970,6 @@ define <2 x half> @flat_agent_atomic_fsub_ret_v2f16__offset12b_pos(ptr %ptr, <2
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_v2f16__offset12b_pos:
@ -14255,7 +14191,6 @@ define <2 x half> @flat_agent_atomic_fsub_ret_v2f16__offset12b_neg(ptr %ptr, <2
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_v2f16__offset12b_neg:
@ -14490,7 +14425,6 @@ define void @flat_agent_atomic_fsub_noret_v2f16(ptr %ptr, <2 x half> %val) #0 {
; GFX12-NEXT: s_cbranch_execnz .LBB45_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_v2f16:
@ -14698,7 +14632,6 @@ define void @flat_agent_atomic_fsub_noret_v2f16__offset12b_pos(ptr %ptr, <2 x ha
; GFX12-NEXT: s_cbranch_execnz .LBB46_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_v2f16__offset12b_pos:
@ -14913,7 +14846,6 @@ define void @flat_agent_atomic_fsub_noret_v2f16__offset12b_neg(ptr %ptr, <2 x ha
; GFX12-NEXT: s_cbranch_execnz .LBB47_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_v2f16__offset12b_neg:
@ -15150,7 +15082,6 @@ define <2 x half> @flat_system_atomic_fsub_ret_v2f16__offset12b_pos(ptr %ptr, <2
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fsub_ret_v2f16__offset12b_pos:
@ -15373,7 +15304,6 @@ define void @flat_system_atomic_fsub_noret_v2f16__offset12b_pos(ptr %ptr, <2 x h
; GFX12-NEXT: s_cbranch_execnz .LBB49_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fsub_noret_v2f16__offset12b_pos:
@ -15618,7 +15548,6 @@ define <2 x bfloat> @flat_agent_atomic_fsub_ret_v2bf16(ptr %ptr, <2 x bfloat> %v
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_ret_v2bf16:
@ -15670,7 +15599,6 @@ define <2 x bfloat> @flat_agent_atomic_fsub_ret_v2bf16(ptr %ptr, <2 x bfloat> %v
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_v2bf16:
@ -16065,7 +15993,6 @@ define <2 x bfloat> @flat_agent_atomic_fsub_ret_v2bf16__offset12b_pos(ptr %ptr,
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_ret_v2bf16__offset12b_pos:
@ -16117,7 +16044,6 @@ define <2 x bfloat> @flat_agent_atomic_fsub_ret_v2bf16__offset12b_pos(ptr %ptr,
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_v2bf16__offset12b_pos:
@ -16515,7 +16441,6 @@ define <2 x bfloat> @flat_agent_atomic_fsub_ret_v2bf16__offset12b_neg(ptr %ptr,
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_ret_v2bf16__offset12b_neg:
@ -16567,7 +16492,6 @@ define <2 x bfloat> @flat_agent_atomic_fsub_ret_v2bf16__offset12b_neg(ptr %ptr,
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_ret_v2bf16__offset12b_neg:
@ -16978,7 +16902,6 @@ define void @flat_agent_atomic_fsub_noret_v2bf16(ptr %ptr, <2 x bfloat> %val) #0
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB53_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_noret_v2bf16:
@ -17029,7 +16952,6 @@ define void @flat_agent_atomic_fsub_noret_v2bf16(ptr %ptr, <2 x bfloat> %val) #0
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB53_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_v2bf16:
@ -17410,7 +17332,6 @@ define void @flat_agent_atomic_fsub_noret_v2bf16__offset12b_pos(ptr %ptr, <2 x b
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB54_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_noret_v2bf16__offset12b_pos:
@ -17461,7 +17382,6 @@ define void @flat_agent_atomic_fsub_noret_v2bf16__offset12b_pos(ptr %ptr, <2 x b
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB54_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_v2bf16__offset12b_pos:
@ -17849,7 +17769,6 @@ define void @flat_agent_atomic_fsub_noret_v2bf16__offset12b_neg(ptr %ptr, <2 x b
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB55_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_agent_atomic_fsub_noret_v2bf16__offset12b_neg:
@ -17900,7 +17819,6 @@ define void @flat_agent_atomic_fsub_noret_v2bf16__offset12b_neg(ptr %ptr, <2 x b
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB55_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_agent_atomic_fsub_noret_v2bf16__offset12b_neg:
@ -18314,7 +18232,6 @@ define <2 x bfloat> @flat_system_atomic_fsub_ret_v2bf16__offset12b_pos(ptr %ptr,
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fsub_ret_v2bf16__offset12b_pos:
@ -18367,7 +18284,6 @@ define <2 x bfloat> @flat_system_atomic_fsub_ret_v2bf16__offset12b_pos(ptr %ptr,
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fsub_ret_v2bf16__offset12b_pos:
@ -18765,7 +18681,6 @@ define void @flat_system_atomic_fsub_noret_v2bf16__offset12b_pos(ptr %ptr, <2 x
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB57_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: flat_system_atomic_fsub_noret_v2bf16__offset12b_pos:
@ -18817,7 +18732,6 @@ define void @flat_system_atomic_fsub_noret_v2bf16__offset12b_pos(ptr %ptr, <2 x
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB57_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: flat_system_atomic_fsub_noret_v2bf16__offset12b_pos:

View File

@ -143,7 +143,6 @@ define amdgpu_ps float @flat_xchg_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset,
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX950-SDAG-LABEL: flat_xchg_saddr_i32_rtn:
@ -184,7 +183,6 @@ define amdgpu_ps float @flat_xchg_saddr_i32_rtn_2048(ptr inreg %sbase, i32 %voff
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v0, v1, s[2:3] offset:2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX950-SDAG-LABEL: flat_xchg_saddr_i32_rtn_2048:
@ -226,7 +224,6 @@ define amdgpu_ps float @flat_xchg_saddr_i32_rtn_neg2048(ptr inreg %sbase, i32 %v
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v0, v1, s[2:3] offset:-2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX950-SDAG-LABEL: flat_xchg_saddr_i32_rtn_neg2048:
@ -286,7 +283,6 @@ define amdgpu_ps float @flat_xchg_saddr_uniform_ptr_in_vgprs_rtn(i32 %voffset, i
; GFX1250-SDAG-NEXT: flat_atomic_swap_b32 v0, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: ; return to shader part epilog
;
; GFX1250-GISEL-LABEL: flat_xchg_saddr_uniform_ptr_in_vgprs_rtn:
@ -303,7 +299,6 @@ define amdgpu_ps float @flat_xchg_saddr_uniform_ptr_in_vgprs_rtn(i32 %voffset, i
; GFX1250-GISEL-NEXT: flat_atomic_swap_b32 v0, v[2:3], v1 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
;
; GFX950-SDAG-LABEL: flat_xchg_saddr_uniform_ptr_in_vgprs_rtn:
@ -355,7 +350,6 @@ define amdgpu_ps float @flat_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset(i32 %
; GFX1250-SDAG-NEXT: flat_atomic_swap_b32 v0, v0, v1, s[0:1] offset:42 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: ; return to shader part epilog
;
; GFX1250-GISEL-LABEL: flat_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset:
@ -372,7 +366,6 @@ define amdgpu_ps float @flat_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset(i32 %
; GFX1250-GISEL-NEXT: flat_atomic_swap_b32 v0, v[2:3], v1 offset:42 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
;
; GFX950-SDAG-LABEL: flat_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset:
@ -571,7 +564,6 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB10_4
; GFX1250-SDAG-NEXT: .LBB10_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB10_5
; GFX1250-SDAG-NEXT: .LBB10_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
@ -618,7 +610,6 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB10_4
; GFX1250-GISEL-NEXT: .LBB10_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB10_5
; GFX1250-GISEL-NEXT: .LBB10_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
@ -753,7 +744,6 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn_neg128(ptr inreg %sbase, i
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB11_4
; GFX1250-SDAG-NEXT: .LBB11_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB11_5
; GFX1250-SDAG-NEXT: .LBB11_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
@ -803,7 +793,6 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn_neg128(ptr inreg %sbase, i
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB11_4
; GFX1250-GISEL-NEXT: .LBB11_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB11_5
; GFX1250-GISEL-NEXT: .LBB11_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
@ -1240,7 +1229,6 @@ define amdgpu_ps float @flat_add_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i
; GFX1250-NEXT: flat_atomic_add_u32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX950-SDAG-LABEL: flat_add_saddr_i32_rtn:
@ -1281,7 +1269,6 @@ define amdgpu_ps float @flat_add_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %vof
; GFX1250-NEXT: flat_atomic_add_u32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX950-SDAG-LABEL: flat_add_saddr_i32_rtn_neg128:
@ -1425,7 +1412,6 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB18_4
; GFX1250-SDAG-NEXT: .LBB18_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB18_5
; GFX1250-SDAG-NEXT: .LBB18_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
@ -1472,7 +1458,6 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB18_4
; GFX1250-GISEL-NEXT: .LBB18_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB18_5
; GFX1250-GISEL-NEXT: .LBB18_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
@ -1611,7 +1596,6 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB19_4
; GFX1250-SDAG-NEXT: .LBB19_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB19_5
; GFX1250-SDAG-NEXT: .LBB19_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
@ -1661,7 +1645,6 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB19_4
; GFX1250-GISEL-NEXT: .LBB19_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB19_5
; GFX1250-GISEL-NEXT: .LBB19_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
@ -2130,7 +2113,6 @@ define amdgpu_ps float @flat_sub_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i
; GFX1250-NEXT: flat_atomic_sub_u32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX950-SDAG-LABEL: flat_sub_saddr_i32_rtn:
@ -2171,7 +2153,6 @@ define amdgpu_ps float @flat_sub_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %vof
; GFX1250-NEXT: flat_atomic_sub_u32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX950-SDAG-LABEL: flat_sub_saddr_i32_rtn_neg128:
@ -2315,7 +2296,6 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB26_4
; GFX1250-SDAG-NEXT: .LBB26_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB26_5
; GFX1250-SDAG-NEXT: .LBB26_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
@ -2362,7 +2342,6 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB26_4
; GFX1250-GISEL-NEXT: .LBB26_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB26_5
; GFX1250-GISEL-NEXT: .LBB26_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
@ -2503,7 +2482,6 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB27_4
; GFX1250-SDAG-NEXT: .LBB27_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB27_5
; GFX1250-SDAG-NEXT: .LBB27_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
@ -2553,7 +2531,6 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB27_4
; GFX1250-GISEL-NEXT: .LBB27_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB27_5
; GFX1250-GISEL-NEXT: .LBB27_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
@ -3028,7 +3005,6 @@ define amdgpu_ps float @flat_and_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i
; GFX1250-NEXT: flat_atomic_and_b32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX950-SDAG-LABEL: flat_and_saddr_i32_rtn:
@ -3069,7 +3045,6 @@ define amdgpu_ps float @flat_and_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %vof
; GFX1250-NEXT: flat_atomic_and_b32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX950-SDAG-LABEL: flat_and_saddr_i32_rtn_neg128:
@ -3213,7 +3188,6 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB34_4
; GFX1250-SDAG-NEXT: .LBB34_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB34_5
; GFX1250-SDAG-NEXT: .LBB34_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
@ -3261,7 +3235,6 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB34_4
; GFX1250-GISEL-NEXT: .LBB34_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB34_5
; GFX1250-GISEL-NEXT: .LBB34_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
@ -3401,7 +3374,6 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB35_4
; GFX1250-SDAG-NEXT: .LBB35_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB35_5
; GFX1250-SDAG-NEXT: .LBB35_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
@ -3452,7 +3424,6 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB35_4
; GFX1250-GISEL-NEXT: .LBB35_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB35_5
; GFX1250-GISEL-NEXT: .LBB35_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
@ -3926,7 +3897,6 @@ define amdgpu_ps float @flat_or_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i3
; GFX1250-NEXT: flat_atomic_or_b32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX950-SDAG-LABEL: flat_or_saddr_i32_rtn:
@ -3967,7 +3937,6 @@ define amdgpu_ps float @flat_or_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %voff
; GFX1250-NEXT: flat_atomic_or_b32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX950-SDAG-LABEL: flat_or_saddr_i32_rtn_neg128:
@ -4111,7 +4080,6 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn(ptr inreg %sbase, i32 %voffs
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB42_4
; GFX1250-SDAG-NEXT: .LBB42_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB42_5
; GFX1250-SDAG-NEXT: .LBB42_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
@ -4159,7 +4127,6 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn(ptr inreg %sbase, i32 %voffs
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB42_4
; GFX1250-GISEL-NEXT: .LBB42_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB42_5
; GFX1250-GISEL-NEXT: .LBB42_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
@ -4299,7 +4266,6 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn_neg128(ptr inreg %sbase, i32
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB43_4
; GFX1250-SDAG-NEXT: .LBB43_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB43_5
; GFX1250-SDAG-NEXT: .LBB43_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
@ -4350,7 +4316,6 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn_neg128(ptr inreg %sbase, i32
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB43_4
; GFX1250-GISEL-NEXT: .LBB43_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB43_5
; GFX1250-GISEL-NEXT: .LBB43_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
@ -4824,7 +4789,6 @@ define amdgpu_ps float @flat_xor_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i
; GFX1250-NEXT: flat_atomic_xor_b32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX950-SDAG-LABEL: flat_xor_saddr_i32_rtn:
@ -4865,7 +4829,6 @@ define amdgpu_ps float @flat_xor_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %vof
; GFX1250-NEXT: flat_atomic_xor_b32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX950-SDAG-LABEL: flat_xor_saddr_i32_rtn_neg128:
@ -5009,7 +4972,6 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB50_4
; GFX1250-SDAG-NEXT: .LBB50_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB50_5
; GFX1250-SDAG-NEXT: .LBB50_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
@ -5057,7 +5019,6 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB50_4
; GFX1250-GISEL-NEXT: .LBB50_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB50_5
; GFX1250-GISEL-NEXT: .LBB50_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
@ -5197,7 +5158,6 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB51_4
; GFX1250-SDAG-NEXT: .LBB51_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB51_5
; GFX1250-SDAG-NEXT: .LBB51_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
@ -5248,7 +5208,6 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB51_4
; GFX1250-GISEL-NEXT: .LBB51_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB51_5
; GFX1250-GISEL-NEXT: .LBB51_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
@ -9067,7 +9026,6 @@ define amdgpu_ps float @flat_cmpxchg_saddr_i32_rtn(ptr inreg %sbase, i32 %voffse
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v0, v[2:3], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX950-SDAG-LABEL: flat_cmpxchg_saddr_i32_rtn:
@ -9111,7 +9069,6 @@ define amdgpu_ps float @flat_cmpxchg_saddr_i32_rtn_neg128(ptr inreg %sbase, i32
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v0, v[2:3], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GFX950-SDAG-LABEL: flat_cmpxchg_saddr_i32_rtn_neg128:
@ -9262,7 +9219,6 @@ define amdgpu_ps <2 x float> @flat_cmpxchg_saddr_i64_rtn(ptr inreg %sbase, i32 %
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB90_4
; GFX1250-SDAG-NEXT: .LBB90_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB90_5
; GFX1250-SDAG-NEXT: .LBB90_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_SYS
@ -9311,7 +9267,6 @@ define amdgpu_ps <2 x float> @flat_cmpxchg_saddr_i64_rtn(ptr inreg %sbase, i32 %
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB90_4
; GFX1250-GISEL-NEXT: .LBB90_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB90_5
; GFX1250-GISEL-NEXT: .LBB90_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_SYS
@ -9461,7 +9416,6 @@ define amdgpu_ps <2 x float> @flat_cmpxchg_saddr_i64_rtn_neg128(ptr inreg %sbase
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB91_4
; GFX1250-SDAG-NEXT: .LBB91_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB91_5
; GFX1250-SDAG-NEXT: .LBB91_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_SYS
@ -9513,7 +9467,6 @@ define amdgpu_ps <2 x float> @flat_cmpxchg_saddr_i64_rtn_neg128(ptr inreg %sbase
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB91_4
; GFX1250-GISEL-NEXT: .LBB91_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB91_5
; GFX1250-GISEL-NEXT: .LBB91_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_SYS

View File

@ -1935,7 +1935,6 @@ define amdgpu_ps float @atomic_flat_load_saddr_i32(ptr inreg %sbase, i32 %voffse
; GFX1250-NEXT: flat_load_b32 v0, v0, s[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@ -1951,7 +1950,6 @@ define amdgpu_ps float @atomic_flat_load_saddr_i32_immneg128(ptr inreg %sbase, i
; GFX1250-NEXT: flat_load_b32 v0, v0, s[2:3] offset:-128 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@ -1968,7 +1966,6 @@ define amdgpu_ps <2 x float> @atomic_flat_load_saddr_i64(ptr inreg %sbase, i32 %
; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@ -1984,7 +1981,6 @@ define amdgpu_ps <2 x float> @atomic_flat_load_saddr_i64_immneg128(ptr inreg %sb
; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset

View File

@ -113,7 +113,6 @@ define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) {
; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: flat_atomic_fadd_f32_rtn_pat:
@ -126,7 +125,6 @@ define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) {
; GFX1250-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
ret float %ret
@ -185,7 +183,6 @@ define <2 x half> @local_atomic_fadd_v2f16_rtn(ptr addrspace(3) %ptr, <2 x half>
; GFX12-NEXT: ds_pk_add_rtn_f16 v0, v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: local_atomic_fadd_v2f16_rtn:
@ -253,7 +250,6 @@ define <2 x i16> @local_atomic_fadd_v2bf16_rtn(ptr addrspace(3) %ptr, <2 x i16>
; GFX12-NEXT: ds_pk_add_rtn_bf16 v0, v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: local_atomic_fadd_v2bf16_rtn:

View File

@ -1684,7 +1684,6 @@ define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %dat
; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
@ -1722,7 +1721,6 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl
; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
@ -1762,7 +1760,6 @@ define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, doub
; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0
@ -1989,7 +1986,6 @@ define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 {
; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
@ -2027,7 +2023,6 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
@ -2069,7 +2064,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 {
; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0

File diff suppressed because it is too large Load Diff

View File

@ -27,7 +27,6 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(pt
; GFX12-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
@ -189,7 +188,6 @@ define float @global_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_gr
; GFX12-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -353,7 +351,6 @@ define float @global_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_gr
; GFX12-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off offset:-2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -517,7 +514,6 @@ define void @global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(p
; GFX12-NEXT: global_atomic_max_num_f32 v[0:1], v2, off scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
@ -673,7 +669,6 @@ define void @global_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_g
; GFX12-NEXT: global_atomic_max_num_f32 v[0:1], v2, off offset:2044 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -832,7 +827,6 @@ define void @global_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_g
; GFX12-NEXT: global_atomic_max_num_f32 v[0:1], v2, off offset:-2048 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -992,7 +986,6 @@ define float @global_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_g
; GFX12-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -1159,7 +1152,6 @@ define void @global_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_
; GFX12-NEXT: global_atomic_max_num_f32 v[0:1], v2, off offset:2044 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -1320,7 +1312,6 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory(ptr addr
; GFX12-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory:
@ -1551,7 +1542,6 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__a
; GFX12-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -1717,7 +1707,6 @@ define float @global_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memo
; GFX12-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memory:
@ -1879,7 +1868,6 @@ define float @global_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fi
; GFX12-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -2043,7 +2031,6 @@ define float @global_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fi
; GFX12-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off offset:-2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
@ -2207,7 +2194,6 @@ define void @global_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_mem
; GFX12-NEXT: global_atomic_max_num_f32 v[0:1], v2, off scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_memory:
@ -2363,7 +2349,6 @@ define void @global_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_f
; GFX12-NEXT: global_atomic_max_num_f32 v[0:1], v2, off offset:2044 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -2522,7 +2507,6 @@ define void @global_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_f
; GFX12-NEXT: global_atomic_max_num_f32 v[0:1], v2, off offset:-2048 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
@ -2682,7 +2666,6 @@ define float @global_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_f
; GFX12-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -2849,7 +2832,6 @@ define void @global_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_
; GFX12-NEXT: global_atomic_max_num_f32 v[0:1], v2, off offset:2044 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -3033,7 +3015,6 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(p
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
@ -3206,7 +3187,6 @@ define double @global_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_g
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -3380,7 +3360,6 @@ define double @global_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_g
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -3553,7 +3532,6 @@ define void @global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(p
; GFX12-NEXT: s_cbranch_execnz .LBB21_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
@ -3716,7 +3694,6 @@ define void @global_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_g
; GFX12-NEXT: s_cbranch_execnz .LBB22_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -3882,7 +3859,6 @@ define void @global_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_g
; GFX12-NEXT: s_cbranch_execnz .LBB23_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -4049,7 +4025,6 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_remote_memory(ptr add
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_remote_memory:
@ -4297,7 +4272,6 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -4488,7 +4462,6 @@ define half @global_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory(ptr
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory:
@ -4535,7 +4508,6 @@ define half @global_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory(ptr
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory:
@ -4933,7 +4905,6 @@ define half @global_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_gra
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -4982,7 +4953,6 @@ define half @global_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_gra
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -5394,7 +5364,6 @@ define half @global_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_gra
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -5443,7 +5412,6 @@ define half @global_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_gra
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -5851,7 +5819,6 @@ define void @global_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory(p
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB29_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory:
@ -5897,7 +5864,6 @@ define void @global_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory(p
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB29_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory:
@ -6282,7 +6248,6 @@ define void @global_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_g
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB30_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -6330,7 +6295,6 @@ define void @global_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_g
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB30_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -6728,7 +6692,6 @@ define void @global_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_g
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB31_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -6776,7 +6739,6 @@ define void @global_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_g
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB31_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -7163,7 +7125,6 @@ define half @global_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -7199,7 +7160,6 @@ define half @global_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -7504,7 +7464,6 @@ define void @global_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_n
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB33_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -7539,7 +7498,6 @@ define void @global_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_n
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB33_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -7852,7 +7810,6 @@ define half @global_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -7902,7 +7859,6 @@ define half @global_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -8315,7 +8271,6 @@ define void @global_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB35_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -8364,7 +8319,6 @@ define void @global_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB35_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -8777,7 +8731,6 @@ define bfloat @global_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory(
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory:
@ -8831,7 +8784,6 @@ define bfloat @global_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory(
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory:
@ -9282,7 +9234,6 @@ define bfloat @global_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -9339,7 +9290,6 @@ define bfloat @global_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -9803,7 +9753,6 @@ define bfloat @global_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -9860,7 +9809,6 @@ define bfloat @global_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -10322,7 +10270,6 @@ define void @global_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory(
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB39_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory:
@ -10374,7 +10321,6 @@ define void @global_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory(
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB39_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory:
@ -10811,7 +10757,6 @@ define void @global_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB40_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -10866,7 +10811,6 @@ define void @global_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB40_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -11315,7 +11259,6 @@ define void @global_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB41_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -11370,7 +11313,6 @@ define void @global_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB41_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -11810,7 +11752,6 @@ define bfloat @global_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -11854,7 +11795,6 @@ define bfloat @global_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -12222,7 +12162,6 @@ define void @global_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB43_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -12264,7 +12203,6 @@ define void @global_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB43_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -12637,7 +12575,6 @@ define bfloat @global_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -12695,7 +12632,6 @@ define bfloat @global_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -13160,7 +13096,6 @@ define void @global_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB45_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -13216,7 +13151,6 @@ define void @global_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB45_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -13648,7 +13582,6 @@ define <2 x half> @global_agent_atomic_fmax_ret_v2f16__amdgpu_no_fine_grained_me
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_v2f16__amdgpu_no_fine_grained_memory:
@ -13941,7 +13874,6 @@ define <2 x half> @global_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -14236,7 +14168,6 @@ define <2 x half> @global_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -14534,7 +14465,6 @@ define void @global_agent_atomic_fmax_noret_v2f16__amdgpu_no_fine_grained_memory
; GFX12-NEXT: s_cbranch_execnz .LBB49_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_v2f16__amdgpu_no_fine_grained_memory:
@ -14816,7 +14746,6 @@ define void @global_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine
; GFX12-NEXT: s_cbranch_execnz .LBB50_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -15101,7 +15030,6 @@ define void @global_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine
; GFX12-NEXT: s_cbranch_execnz .LBB51_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -15396,7 +15324,6 @@ define <2 x half> @global_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -15693,7 +15620,6 @@ define void @global_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fin
; GFX12-NEXT: s_cbranch_execnz .LBB53_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -16005,7 +15931,6 @@ define <2 x bfloat> @global_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained_memory:
@ -16057,7 +15982,6 @@ define <2 x bfloat> @global_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained_memory:
@ -16505,7 +16429,6 @@ define <2 x bfloat> @global_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -16557,7 +16480,6 @@ define <2 x bfloat> @global_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -17007,7 +16929,6 @@ define <2 x bfloat> @global_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -17059,7 +16980,6 @@ define <2 x bfloat> @global_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -17510,7 +17430,6 @@ define void @global_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memor
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB57_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memory:
@ -17561,7 +17480,6 @@ define void @global_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memor
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB57_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memory:
@ -17993,7 +17911,6 @@ define void @global_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fin
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB58_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -18044,7 +17961,6 @@ define void @global_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fin
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB58_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -18479,7 +18395,6 @@ define void @global_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fin
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB59_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -18530,7 +18445,6 @@ define void @global_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fin
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB59_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -18977,7 +18891,6 @@ define <2 x bfloat> @global_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -19030,7 +18943,6 @@ define <2 x bfloat> @global_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -19480,7 +19392,6 @@ define void @global_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fi
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB61_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -19532,7 +19443,6 @@ define void @global_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fi
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB61_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:

View File

@ -27,7 +27,6 @@ define float @global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(pt
; GFX12-NEXT: global_atomic_min_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
@ -189,7 +188,6 @@ define float @global_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_gr
; GFX12-NEXT: global_atomic_min_num_f32 v0, v[0:1], v2, off offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -353,7 +351,6 @@ define float @global_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_gr
; GFX12-NEXT: global_atomic_min_num_f32 v0, v[0:1], v2, off offset:-2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -517,7 +514,6 @@ define void @global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(p
; GFX12-NEXT: global_atomic_min_num_f32 v[0:1], v2, off scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
@ -673,7 +669,6 @@ define void @global_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_g
; GFX12-NEXT: global_atomic_min_num_f32 v[0:1], v2, off offset:2044 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -832,7 +827,6 @@ define void @global_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_g
; GFX12-NEXT: global_atomic_min_num_f32 v[0:1], v2, off offset:-2048 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -992,7 +986,6 @@ define float @global_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_g
; GFX12-NEXT: global_atomic_min_num_f32 v0, v[0:1], v2, off offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -1159,7 +1152,6 @@ define void @global_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_
; GFX12-NEXT: global_atomic_min_num_f32 v[0:1], v2, off offset:2044 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -1320,7 +1312,6 @@ define float @global_agent_atomic_fmin_ret_f32__amdgpu_no_remote_memory(ptr addr
; GFX12-NEXT: global_atomic_min_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_remote_memory:
@ -1551,7 +1542,6 @@ define float @global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory__a
; GFX12-NEXT: global_atomic_min_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -1717,7 +1707,6 @@ define float @global_agent_atomic_fmin_ret_f32__ftz__amdgpu_no_fine_grained_memo
; GFX12-NEXT: global_atomic_min_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__ftz__amdgpu_no_fine_grained_memory:
@ -1879,7 +1868,6 @@ define float @global_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fi
; GFX12-NEXT: global_atomic_min_num_f32 v0, v[0:1], v2, off offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -2043,7 +2031,6 @@ define float @global_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fi
; GFX12-NEXT: global_atomic_min_num_f32 v0, v[0:1], v2, off offset:-2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
@ -2207,7 +2194,6 @@ define void @global_agent_atomic_fmin_noret_f32__ftz__amdgpu_no_fine_grained_mem
; GFX12-NEXT: global_atomic_min_num_f32 v[0:1], v2, off scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_f32__ftz__amdgpu_no_fine_grained_memory:
@ -2363,7 +2349,6 @@ define void @global_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_f
; GFX12-NEXT: global_atomic_min_num_f32 v[0:1], v2, off offset:2044 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -2522,7 +2507,6 @@ define void @global_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_f
; GFX12-NEXT: global_atomic_min_num_f32 v[0:1], v2, off offset:-2048 scope:SCOPE_DEV
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
@ -2682,7 +2666,6 @@ define float @global_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_f
; GFX12-NEXT: global_atomic_min_num_f32 v0, v[0:1], v2, off offset:2044 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -2849,7 +2832,6 @@ define void @global_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_
; GFX12-NEXT: global_atomic_min_num_f32 v[0:1], v2, off offset:2044 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
@ -3033,7 +3015,6 @@ define double @global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(p
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
@ -3206,7 +3187,6 @@ define double @global_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_g
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -3380,7 +3360,6 @@ define double @global_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_g
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -3553,7 +3532,6 @@ define void @global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(p
; GFX12-NEXT: s_cbranch_execnz .LBB21_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
@ -3716,7 +3694,6 @@ define void @global_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_g
; GFX12-NEXT: s_cbranch_execnz .LBB22_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -3882,7 +3859,6 @@ define void @global_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_g
; GFX12-NEXT: s_cbranch_execnz .LBB23_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -4049,7 +4025,6 @@ define double @global_agent_atomic_fmin_ret_f64__amdgpu_no_remote_memory(ptr add
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_remote_memory:
@ -4297,7 +4272,6 @@ define double @global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
@ -4488,7 +4462,6 @@ define half @global_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory(ptr
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory:
@ -4535,7 +4508,6 @@ define half @global_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory(ptr
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory:
@ -4933,7 +4905,6 @@ define half @global_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_gra
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -4982,7 +4953,6 @@ define half @global_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_gra
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -5394,7 +5364,6 @@ define half @global_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_gra
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -5443,7 +5412,6 @@ define half @global_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_gra
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -5851,7 +5819,6 @@ define void @global_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory(p
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB29_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory:
@ -5897,7 +5864,6 @@ define void @global_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory(p
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB29_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory:
@ -6282,7 +6248,6 @@ define void @global_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_g
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB30_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -6330,7 +6295,6 @@ define void @global_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_g
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB30_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -6728,7 +6692,6 @@ define void @global_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_g
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB31_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -6776,7 +6739,6 @@ define void @global_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_g
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB31_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -7163,7 +7125,6 @@ define half @global_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -7199,7 +7160,6 @@ define half @global_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -7504,7 +7464,6 @@ define void @global_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_n
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB33_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -7539,7 +7498,6 @@ define void @global_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_n
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB33_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -7852,7 +7810,6 @@ define half @global_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -7902,7 +7859,6 @@ define half @global_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_gr
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -8315,7 +8271,6 @@ define void @global_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB35_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -8364,7 +8319,6 @@ define void @global_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB35_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -8777,7 +8731,6 @@ define bfloat @global_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory(
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory:
@ -8831,7 +8784,6 @@ define bfloat @global_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory(
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory:
@ -9282,7 +9234,6 @@ define bfloat @global_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -9339,7 +9290,6 @@ define bfloat @global_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -9803,7 +9753,6 @@ define bfloat @global_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -9860,7 +9809,6 @@ define bfloat @global_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -10322,7 +10270,6 @@ define void @global_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory(
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB39_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory:
@ -10374,7 +10321,6 @@ define void @global_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory(
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB39_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory:
@ -10811,7 +10757,6 @@ define void @global_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB40_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -10866,7 +10811,6 @@ define void @global_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB40_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -11315,7 +11259,6 @@ define void @global_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB41_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -11370,7 +11313,6 @@ define void @global_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB41_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -11810,7 +11752,6 @@ define bfloat @global_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -11854,7 +11795,6 @@ define bfloat @global_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
@ -12222,7 +12162,6 @@ define void @global_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB43_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -12264,7 +12203,6 @@ define void @global_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB43_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
@ -12637,7 +12575,6 @@ define bfloat @global_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -12695,7 +12632,6 @@ define bfloat @global_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -13160,7 +13096,6 @@ define void @global_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB45_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -13216,7 +13151,6 @@ define void @global_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB45_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -13648,7 +13582,6 @@ define <2 x half> @global_agent_atomic_fmin_ret_v2f16__amdgpu_no_fine_grained_me
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_v2f16__amdgpu_no_fine_grained_memory:
@ -13941,7 +13874,6 @@ define <2 x half> @global_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -14236,7 +14168,6 @@ define <2 x half> @global_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -14534,7 +14465,6 @@ define void @global_agent_atomic_fmin_noret_v2f16__amdgpu_no_fine_grained_memory
; GFX12-NEXT: s_cbranch_execnz .LBB49_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_v2f16__amdgpu_no_fine_grained_memory:
@ -14816,7 +14746,6 @@ define void @global_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine
; GFX12-NEXT: s_cbranch_execnz .LBB50_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -15101,7 +15030,6 @@ define void @global_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine
; GFX12-NEXT: s_cbranch_execnz .LBB51_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -15396,7 +15324,6 @@ define <2 x half> @global_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -15693,7 +15620,6 @@ define void @global_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fin
; GFX12-NEXT: s_cbranch_execnz .LBB53_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -16005,7 +15931,6 @@ define <2 x bfloat> @global_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained_memory:
@ -16057,7 +15982,6 @@ define <2 x bfloat> @global_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained_memory:
@ -16505,7 +16429,6 @@ define <2 x bfloat> @global_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -16557,7 +16480,6 @@ define <2 x bfloat> @global_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -17007,7 +16929,6 @@ define <2 x bfloat> @global_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -17059,7 +16980,6 @@ define <2 x bfloat> @global_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -17510,7 +17430,6 @@ define void @global_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memor
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB57_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memory:
@ -17561,7 +17480,6 @@ define void @global_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memor
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB57_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memory:
@ -17993,7 +17911,6 @@ define void @global_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fin
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB58_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -18044,7 +17961,6 @@ define void @global_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fin
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB58_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -18479,7 +18395,6 @@ define void @global_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fin
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB59_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -18530,7 +18445,6 @@ define void @global_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fin
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB59_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
@ -18977,7 +18891,6 @@ define <2 x bfloat> @global_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -19030,7 +18943,6 @@ define <2 x bfloat> @global_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -19480,7 +19392,6 @@ define void @global_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fi
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB61_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
@ -19532,7 +19443,6 @@ define void @global_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fi
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB61_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:

View File

@ -44,7 +44,6 @@ define float @global_agent_atomic_fsub_ret_f32(ptr addrspace(1) %ptr, float %val
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_f32:
@ -276,7 +275,6 @@ define float @global_agent_atomic_fsub_ret_f32__offset12b_pos(ptr addrspace(1) %
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_f32__offset12b_pos:
@ -510,7 +508,6 @@ define float @global_agent_atomic_fsub_ret_f32__offset12b_neg(ptr addrspace(1) %
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_f32__offset12b_neg:
@ -752,7 +749,6 @@ define void @global_agent_atomic_fsub_noret_f32(ptr addrspace(1) %ptr, float %va
; GFX12-NEXT: s_cbranch_execnz .LBB3_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_f32:
@ -973,7 +969,6 @@ define void @global_agent_atomic_fsub_noret_f32__offset12b_pos(ptr addrspace(1)
; GFX12-NEXT: s_cbranch_execnz .LBB4_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_f32__offset12b_pos:
@ -1197,7 +1192,6 @@ define void @global_agent_atomic_fsub_noret_f32__offset12b_neg(ptr addrspace(1)
; GFX12-NEXT: s_cbranch_execnz .LBB5_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_f32__offset12b_neg:
@ -1432,7 +1426,6 @@ define float @global_system_atomic_fsub_ret_f32__offset12b_pos(ptr addrspace(1)
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fsub_ret_f32__offset12b_pos:
@ -1667,7 +1660,6 @@ define void @global_system_atomic_fsub_noret_f32__offset12b_pos(ptr addrspace(1)
; GFX12-NEXT: s_cbranch_execnz .LBB7_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fsub_noret_f32__offset12b_pos:
@ -1899,7 +1891,6 @@ define float @global_agent_atomic_fsub_ret_f32__ftz(ptr addrspace(1) %ptr, float
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_f32__ftz:
@ -2131,7 +2122,6 @@ define float @global_agent_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr addrspace
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_f32__offset12b_pos__ftz:
@ -2365,7 +2355,6 @@ define float @global_agent_atomic_fsub_ret_f32__offset12b_neg__ftz(ptr addrspace
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_f32__offset12b_neg__ftz:
@ -2607,7 +2596,6 @@ define void @global_agent_atomic_fsub_noret_f32__ftz(ptr addrspace(1) %ptr, floa
; GFX12-NEXT: s_cbranch_execnz .LBB11_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_f32__ftz:
@ -2828,7 +2816,6 @@ define void @global_agent_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr addrspac
; GFX12-NEXT: s_cbranch_execnz .LBB12_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_f32__offset12b_pos__ftz:
@ -3052,7 +3039,6 @@ define void @global_agent_atomic_fsub_noret_f32__offset12b_neg__ftz(ptr addrspac
; GFX12-NEXT: s_cbranch_execnz .LBB13_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_f32__offset12b_neg__ftz:
@ -3287,7 +3273,6 @@ define float @global_system_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr addrspac
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fsub_ret_f32__offset12b_pos__ftz:
@ -3522,7 +3507,6 @@ define void @global_system_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr addrspa
; GFX12-NEXT: s_cbranch_execnz .LBB15_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fsub_noret_f32__offset12b_pos__ftz:
@ -3754,7 +3738,6 @@ define double @global_agent_atomic_fsub_ret_f64(ptr addrspace(1) %ptr, double %v
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_f64:
@ -4006,7 +3989,6 @@ define double @global_agent_atomic_fsub_ret_f64__offset12b_pos(ptr addrspace(1)
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_f64__offset12b_pos:
@ -4259,7 +4241,6 @@ define double @global_agent_atomic_fsub_ret_f64__offset12b_neg(ptr addrspace(1)
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_f64__offset12b_neg:
@ -4514,7 +4495,6 @@ define void @global_agent_atomic_fsub_noret_f64(ptr addrspace(1) %ptr, double %v
; GFX12-NEXT: s_cbranch_execnz .LBB19_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_f64:
@ -4744,7 +4724,6 @@ define void @global_agent_atomic_fsub_noret_f64__offset12b_pos(ptr addrspace(1)
; GFX12-NEXT: s_cbranch_execnz .LBB20_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_f64__offset12b_pos:
@ -4977,7 +4956,6 @@ define void @global_agent_atomic_fsub_noret_f64__offset12b_neg(ptr addrspace(1)
; GFX12-NEXT: s_cbranch_execnz .LBB21_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_f64__offset12b_neg:
@ -5237,7 +5215,6 @@ define half @global_agent_atomic_fsub_ret_f16(ptr addrspace(1) %ptr, half %val)
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_ret_f16:
@ -5282,7 +5259,6 @@ define half @global_agent_atomic_fsub_ret_f16(ptr addrspace(1) %ptr, half %val)
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_f16:
@ -5660,7 +5636,6 @@ define half @global_agent_atomic_fsub_ret_f16__offset12b_pos(ptr addrspace(1) %p
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_ret_f16__offset12b_pos:
@ -5706,7 +5681,6 @@ define half @global_agent_atomic_fsub_ret_f16__offset12b_pos(ptr addrspace(1) %p
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_f16__offset12b_pos:
@ -6096,7 +6070,6 @@ define half @global_agent_atomic_fsub_ret_f16__offset12b_neg(ptr addrspace(1) %p
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_ret_f16__offset12b_neg:
@ -6142,7 +6115,6 @@ define half @global_agent_atomic_fsub_ret_f16__offset12b_neg(ptr addrspace(1) %p
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_f16__offset12b_neg:
@ -6531,7 +6503,6 @@ define void @global_agent_atomic_fsub_noret_f16(ptr addrspace(1) %ptr, half %val
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB25_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_noret_f16:
@ -6574,7 +6545,6 @@ define void @global_agent_atomic_fsub_noret_f16(ptr addrspace(1) %ptr, half %val
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB25_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_f16:
@ -6940,7 +6910,6 @@ define void @global_agent_atomic_fsub_noret_f16__offset12b_pos(ptr addrspace(1)
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB26_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_noret_f16__offset12b_pos:
@ -6984,7 +6953,6 @@ define void @global_agent_atomic_fsub_noret_f16__offset12b_pos(ptr addrspace(1)
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB26_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_f16__offset12b_pos:
@ -7361,7 +7329,6 @@ define void @global_agent_atomic_fsub_noret_f16__offset12b_neg(ptr addrspace(1)
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB27_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_noret_f16__offset12b_neg:
@ -7405,7 +7372,6 @@ define void @global_agent_atomic_fsub_noret_f16__offset12b_neg(ptr addrspace(1)
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB27_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_f16__offset12b_neg:
@ -7772,7 +7738,6 @@ define half @global_agent_atomic_fsub_ret_f16__offset12b_pos__align4(ptr addrspa
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_ret_f16__offset12b_pos__align4:
@ -7806,7 +7771,6 @@ define half @global_agent_atomic_fsub_ret_f16__offset12b_pos__align4(ptr addrspa
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_f16__offset12b_pos__align4:
@ -8094,7 +8058,6 @@ define void @global_agent_atomic_fsub_noret_f16__offset12b__align4_pos(ptr addrs
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB29_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_noret_f16__offset12b__align4_pos:
@ -8126,7 +8089,6 @@ define void @global_agent_atomic_fsub_noret_f16__offset12b__align4_pos(ptr addrs
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB29_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_f16__offset12b__align4_pos:
@ -8419,7 +8381,6 @@ define half @global_system_atomic_fsub_ret_f16__offset12b_pos(ptr addrspace(1) %
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_system_atomic_fsub_ret_f16__offset12b_pos:
@ -8466,7 +8427,6 @@ define half @global_system_atomic_fsub_ret_f16__offset12b_pos(ptr addrspace(1) %
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fsub_ret_f16__offset12b_pos:
@ -8858,7 +8818,6 @@ define void @global_system_atomic_fsub_noret_f16__offset12b_pos(ptr addrspace(1)
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB31_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_system_atomic_fsub_noret_f16__offset12b_pos:
@ -8903,7 +8862,6 @@ define void @global_system_atomic_fsub_noret_f16__offset12b_pos(ptr addrspace(1)
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB31_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fsub_noret_f16__offset12b_pos:
@ -9299,7 +9257,6 @@ define bfloat @global_agent_atomic_fsub_ret_bf16(ptr addrspace(1) %ptr, bfloat %
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_ret_bf16:
@ -9353,7 +9310,6 @@ define bfloat @global_agent_atomic_fsub_ret_bf16(ptr addrspace(1) %ptr, bfloat %
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_bf16:
@ -9802,7 +9758,6 @@ define bfloat @global_agent_atomic_fsub_ret_bf16__offset12b_pos(ptr addrspace(1)
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_ret_bf16__offset12b_pos:
@ -9859,7 +9814,6 @@ define bfloat @global_agent_atomic_fsub_ret_bf16__offset12b_pos(ptr addrspace(1)
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_bf16__offset12b_pos:
@ -10321,7 +10275,6 @@ define bfloat @global_agent_atomic_fsub_ret_bf16__offset12b_neg(ptr addrspace(1)
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_ret_bf16__offset12b_neg:
@ -10378,7 +10331,6 @@ define bfloat @global_agent_atomic_fsub_ret_bf16__offset12b_neg(ptr addrspace(1)
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_bf16__offset12b_neg:
@ -10838,7 +10790,6 @@ define void @global_agent_atomic_fsub_noret_bf16(ptr addrspace(1) %ptr, bfloat %
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB35_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_noret_bf16:
@ -10890,7 +10841,6 @@ define void @global_agent_atomic_fsub_noret_bf16(ptr addrspace(1) %ptr, bfloat %
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB35_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_bf16:
@ -11325,7 +11275,6 @@ define void @global_agent_atomic_fsub_noret_bf16__offset12b_pos(ptr addrspace(1)
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB36_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_noret_bf16__offset12b_pos:
@ -11380,7 +11329,6 @@ define void @global_agent_atomic_fsub_noret_bf16__offset12b_pos(ptr addrspace(1)
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB36_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_bf16__offset12b_pos:
@ -11827,7 +11775,6 @@ define void @global_agent_atomic_fsub_noret_bf16__offset12b_neg(ptr addrspace(1)
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB37_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_noret_bf16__offset12b_neg:
@ -11882,7 +11829,6 @@ define void @global_agent_atomic_fsub_noret_bf16__offset12b_neg(ptr addrspace(1)
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB37_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_bf16__offset12b_neg:
@ -12320,7 +12266,6 @@ define bfloat @global_agent_atomic_fsub_ret_bf16__offset12b_pos__align4(ptr addr
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_ret_bf16__offset12b_pos__align4:
@ -12364,7 +12309,6 @@ define bfloat @global_agent_atomic_fsub_ret_bf16__offset12b_pos__align4(ptr addr
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_bf16__offset12b_pos__align4:
@ -12730,7 +12674,6 @@ define void @global_agent_atomic_fsub_noret_bf16__offset12b__align4_pos(ptr addr
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB39_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_noret_bf16__offset12b__align4_pos:
@ -12772,7 +12715,6 @@ define void @global_agent_atomic_fsub_noret_bf16__offset12b__align4_pos(ptr addr
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB39_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_bf16__offset12b__align4_pos:
@ -13143,7 +13085,6 @@ define bfloat @global_system_atomic_fsub_ret_bf16__offset12b_pos(ptr addrspace(1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_system_atomic_fsub_ret_bf16__offset12b_pos:
@ -13201,7 +13142,6 @@ define bfloat @global_system_atomic_fsub_ret_bf16__offset12b_pos(ptr addrspace(1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fsub_ret_bf16__offset12b_pos:
@ -13664,7 +13604,6 @@ define void @global_system_atomic_fsub_noret_bf16__offset12b_pos(ptr addrspace(1
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB41_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_system_atomic_fsub_noret_bf16__offset12b_pos:
@ -13720,7 +13659,6 @@ define void @global_system_atomic_fsub_noret_bf16__offset12b_pos(ptr addrspace(1
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB41_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fsub_noret_bf16__offset12b_pos:
@ -14148,7 +14086,6 @@ define <2 x half> @global_agent_atomic_fsub_ret_v2f16(ptr addrspace(1) %ptr, <2
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_v2f16:
@ -14424,7 +14361,6 @@ define <2 x half> @global_agent_atomic_fsub_ret_v2f16__offset12b_pos(ptr addrspa
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_v2f16__offset12b_pos:
@ -14702,7 +14638,6 @@ define <2 x half> @global_agent_atomic_fsub_ret_v2f16__offset12b_neg(ptr addrspa
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_v2f16__offset12b_neg:
@ -14982,7 +14917,6 @@ define void @global_agent_atomic_fsub_noret_v2f16(ptr addrspace(1) %ptr, <2 x ha
; GFX12-NEXT: s_cbranch_execnz .LBB45_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_v2f16:
@ -15245,7 +15179,6 @@ define void @global_agent_atomic_fsub_noret_v2f16__offset12b_pos(ptr addrspace(1
; GFX12-NEXT: s_cbranch_execnz .LBB46_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_v2f16__offset12b_pos:
@ -15511,7 +15444,6 @@ define void @global_agent_atomic_fsub_noret_v2f16__offset12b_neg(ptr addrspace(1
; GFX12-NEXT: s_cbranch_execnz .LBB47_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_v2f16__offset12b_neg:
@ -15788,7 +15720,6 @@ define <2 x half> @global_system_atomic_fsub_ret_v2f16__offset12b_pos(ptr addrsp
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fsub_ret_v2f16__offset12b_pos:
@ -16067,7 +15998,6 @@ define void @global_system_atomic_fsub_noret_v2f16__offset12b_pos(ptr addrspace(
; GFX12-NEXT: s_cbranch_execnz .LBB49_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fsub_noret_v2f16__offset12b_pos:
@ -16363,7 +16293,6 @@ define <2 x bfloat> @global_agent_atomic_fsub_ret_v2bf16(ptr addrspace(1) %ptr,
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_ret_v2bf16:
@ -16415,7 +16344,6 @@ define <2 x bfloat> @global_agent_atomic_fsub_ret_v2bf16(ptr addrspace(1) %ptr,
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_v2bf16:
@ -16863,7 +16791,6 @@ define <2 x bfloat> @global_agent_atomic_fsub_ret_v2bf16__offset12b_pos(ptr addr
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_ret_v2bf16__offset12b_pos:
@ -16915,7 +16842,6 @@ define <2 x bfloat> @global_agent_atomic_fsub_ret_v2bf16__offset12b_pos(ptr addr
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_v2bf16__offset12b_pos:
@ -17365,7 +17291,6 @@ define <2 x bfloat> @global_agent_atomic_fsub_ret_v2bf16__offset12b_neg(ptr addr
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_ret_v2bf16__offset12b_neg:
@ -17417,7 +17342,6 @@ define <2 x bfloat> @global_agent_atomic_fsub_ret_v2bf16__offset12b_neg(ptr addr
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_ret_v2bf16__offset12b_neg:
@ -17868,7 +17792,6 @@ define void @global_agent_atomic_fsub_noret_v2bf16(ptr addrspace(1) %ptr, <2 x b
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB53_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_noret_v2bf16:
@ -17919,7 +17842,6 @@ define void @global_agent_atomic_fsub_noret_v2bf16(ptr addrspace(1) %ptr, <2 x b
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB53_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_v2bf16:
@ -18351,7 +18273,6 @@ define void @global_agent_atomic_fsub_noret_v2bf16__offset12b_pos(ptr addrspace(
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB54_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_noret_v2bf16__offset12b_pos:
@ -18402,7 +18323,6 @@ define void @global_agent_atomic_fsub_noret_v2bf16__offset12b_pos(ptr addrspace(
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB54_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_v2bf16__offset12b_pos:
@ -18837,7 +18757,6 @@ define void @global_agent_atomic_fsub_noret_v2bf16__offset12b_neg(ptr addrspace(
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB55_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_agent_atomic_fsub_noret_v2bf16__offset12b_neg:
@ -18888,7 +18807,6 @@ define void @global_agent_atomic_fsub_noret_v2bf16__offset12b_neg(ptr addrspace(
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB55_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_agent_atomic_fsub_noret_v2bf16__offset12b_neg:
@ -19335,7 +19253,6 @@ define <2 x bfloat> @global_system_atomic_fsub_ret_v2bf16__offset12b_pos(ptr add
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_system_atomic_fsub_ret_v2bf16__offset12b_pos:
@ -19388,7 +19305,6 @@ define <2 x bfloat> @global_system_atomic_fsub_ret_v2bf16__offset12b_pos(ptr add
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fsub_ret_v2bf16__offset12b_pos:
@ -19838,7 +19754,6 @@ define void @global_system_atomic_fsub_noret_v2bf16__offset12b_pos(ptr addrspace
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB57_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: global_system_atomic_fsub_noret_v2bf16__offset12b_pos:
@ -19890,7 +19805,6 @@ define void @global_system_atomic_fsub_noret_v2bf16__offset12b_pos(ptr addrspace
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB57_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: global_system_atomic_fsub_noret_v2bf16__offset12b_pos:

View File

@ -114,7 +114,6 @@ define amdgpu_ps float @global_max_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX12-NEXT: s_cbranch_execnz .LBB0_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -227,7 +226,6 @@ define amdgpu_ps float @global_max_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %
; GFX12-NEXT: s_cbranch_execnz .LBB1_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -562,7 +560,6 @@ define amdgpu_ps <2 x float> @global_max_saddr_i64_rtn(ptr addrspace(1) inreg %s
; GFX12-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: v_mov_b32_e32 v1, v4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -693,7 +690,6 @@ define amdgpu_ps <2 x float> @global_max_saddr_i64_rtn_neg128(ptr addrspace(1) i
; GFX12-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: v_mov_b32_e32 v1, v4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -1042,7 +1038,6 @@ define amdgpu_ps float @global_min_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX12-NEXT: s_cbranch_execnz .LBB8_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -1155,7 +1150,6 @@ define amdgpu_ps float @global_min_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %
; GFX12-NEXT: s_cbranch_execnz .LBB9_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -1490,7 +1484,6 @@ define amdgpu_ps <2 x float> @global_min_saddr_i64_rtn(ptr addrspace(1) inreg %s
; GFX12-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: v_mov_b32_e32 v1, v4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -1621,7 +1614,6 @@ define amdgpu_ps <2 x float> @global_min_saddr_i64_rtn_neg128(ptr addrspace(1) i
; GFX12-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: v_mov_b32_e32 v1, v4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -1970,7 +1962,6 @@ define amdgpu_ps float @global_umax_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX12-NEXT: s_cbranch_execnz .LBB16_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -2083,7 +2074,6 @@ define amdgpu_ps float @global_umax_saddr_i32_rtn_neg128(ptr addrspace(1) inreg
; GFX12-NEXT: s_cbranch_execnz .LBB17_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -2418,7 +2408,6 @@ define amdgpu_ps <2 x float> @global_umax_saddr_i64_rtn(ptr addrspace(1) inreg %
; GFX12-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: v_mov_b32_e32 v1, v4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -2549,7 +2538,6 @@ define amdgpu_ps <2 x float> @global_umax_saddr_i64_rtn_neg128(ptr addrspace(1)
; GFX12-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: v_mov_b32_e32 v1, v4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -2898,7 +2886,6 @@ define amdgpu_ps float @global_umin_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX12-NEXT: s_cbranch_execnz .LBB24_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -3011,7 +2998,6 @@ define amdgpu_ps float @global_umin_saddr_i32_rtn_neg128(ptr addrspace(1) inreg
; GFX12-NEXT: s_cbranch_execnz .LBB25_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -3346,7 +3332,6 @@ define amdgpu_ps <2 x float> @global_umin_saddr_i64_rtn(ptr addrspace(1) inreg %
; GFX12-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: v_mov_b32_e32 v1, v4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -3477,7 +3462,6 @@ define amdgpu_ps <2 x float> @global_umin_saddr_i64_rtn_neg128(ptr addrspace(1)
; GFX12-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX12-NEXT: v_mov_b32_e32 v0, v3
; GFX12-NEXT: v_mov_b32_e32 v1, v4
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset

View File

@ -147,7 +147,6 @@ define amdgpu_ps float @global_xchg_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX12-NEXT: global_atomic_swap_b32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -189,7 +188,6 @@ define amdgpu_ps float @global_xchg_saddr_i32_rtn_2048(ptr addrspace(1) inreg %s
; GFX12-NEXT: global_atomic_swap_b32 v0, v0, v1, s[2:3] offset:2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -228,7 +226,6 @@ define amdgpu_ps float @global_xchg_saddr_i32_rtn_neg2048(ptr addrspace(1) inreg
; GFX12-NEXT: global_atomic_swap_b32 v0, v0, v1, s[2:3] offset:-2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -295,7 +292,6 @@ define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn(i32 %voffset,
; GFX12-NEXT: global_atomic_swap_b32 v0, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds
%zext.offset = zext i32 %voffset to i64
@ -356,7 +352,6 @@ define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset(i32
; GFX12-NEXT: global_atomic_swap_b32 v0, v0, v1, s[0:1] offset:42 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds
%zext.offset = zext i32 %voffset to i64
@ -523,7 +518,6 @@ define amdgpu_ps <2 x float> @global_xchg_saddr_i64_rtn(ptr addrspace(1) inreg %
; GFX12-NEXT: global_atomic_swap_b64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -561,7 +555,6 @@ define amdgpu_ps <2 x float> @global_xchg_saddr_i64_rtn_neg128(ptr addrspace(1)
; GFX12-NEXT: global_atomic_swap_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -677,7 +670,6 @@ define amdgpu_ps float @global_add_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX12-NEXT: global_atomic_add_u32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -715,7 +707,6 @@ define amdgpu_ps float @global_add_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %
; GFX12-NEXT: global_atomic_add_u32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -827,7 +818,6 @@ define amdgpu_ps <2 x float> @global_add_saddr_i64_rtn(ptr addrspace(1) inreg %s
; GFX12-NEXT: global_atomic_add_u64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -865,7 +855,6 @@ define amdgpu_ps <2 x float> @global_add_saddr_i64_rtn_neg128(ptr addrspace(1) i
; GFX12-NEXT: global_atomic_add_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -981,7 +970,6 @@ define amdgpu_ps float @global_sub_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX12-NEXT: global_atomic_sub_u32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -1019,7 +1007,6 @@ define amdgpu_ps float @global_sub_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %
; GFX12-NEXT: global_atomic_sub_u32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -1131,7 +1118,6 @@ define amdgpu_ps <2 x float> @global_sub_saddr_i64_rtn(ptr addrspace(1) inreg %s
; GFX12-NEXT: global_atomic_sub_u64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -1169,7 +1155,6 @@ define amdgpu_ps <2 x float> @global_sub_saddr_i64_rtn_neg128(ptr addrspace(1) i
; GFX12-NEXT: global_atomic_sub_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -1285,7 +1270,6 @@ define amdgpu_ps float @global_and_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX12-NEXT: global_atomic_and_b32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -1323,7 +1307,6 @@ define amdgpu_ps float @global_and_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %
; GFX12-NEXT: global_atomic_and_b32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -1435,7 +1418,6 @@ define amdgpu_ps <2 x float> @global_and_saddr_i64_rtn(ptr addrspace(1) inreg %s
; GFX12-NEXT: global_atomic_and_b64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -1473,7 +1455,6 @@ define amdgpu_ps <2 x float> @global_and_saddr_i64_rtn_neg128(ptr addrspace(1) i
; GFX12-NEXT: global_atomic_and_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -1589,7 +1570,6 @@ define amdgpu_ps float @global_or_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i
; GFX12-NEXT: global_atomic_or_b32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -1627,7 +1607,6 @@ define amdgpu_ps float @global_or_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %s
; GFX12-NEXT: global_atomic_or_b32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -1739,7 +1718,6 @@ define amdgpu_ps <2 x float> @global_or_saddr_i64_rtn(ptr addrspace(1) inreg %sb
; GFX12-NEXT: global_atomic_or_b64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -1777,7 +1755,6 @@ define amdgpu_ps <2 x float> @global_or_saddr_i64_rtn_neg128(ptr addrspace(1) in
; GFX12-NEXT: global_atomic_or_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -1893,7 +1870,6 @@ define amdgpu_ps float @global_xor_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX12-NEXT: global_atomic_xor_b32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -1931,7 +1907,6 @@ define amdgpu_ps float @global_xor_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %
; GFX12-NEXT: global_atomic_xor_b32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -2043,7 +2018,6 @@ define amdgpu_ps <2 x float> @global_xor_saddr_i64_rtn(ptr addrspace(1) inreg %s
; GFX12-NEXT: global_atomic_xor_b64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -2081,7 +2055,6 @@ define amdgpu_ps <2 x float> @global_xor_saddr_i64_rtn_neg128(ptr addrspace(1) i
; GFX12-NEXT: global_atomic_xor_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -2194,7 +2167,6 @@ define amdgpu_ps float @global_max_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX12-NEXT: global_atomic_max_i32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -2229,7 +2201,6 @@ define amdgpu_ps float @global_max_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %
; GFX12-NEXT: global_atomic_max_i32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -2330,7 +2301,6 @@ define amdgpu_ps <2 x float> @global_max_saddr_i64_rtn(ptr addrspace(1) inreg %s
; GFX12-NEXT: global_atomic_max_i64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -2365,7 +2335,6 @@ define amdgpu_ps <2 x float> @global_max_saddr_i64_rtn_neg128(ptr addrspace(1) i
; GFX12-NEXT: global_atomic_max_i64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -2470,7 +2439,6 @@ define amdgpu_ps float @global_min_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX12-NEXT: global_atomic_min_i32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -2505,7 +2473,6 @@ define amdgpu_ps float @global_min_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %
; GFX12-NEXT: global_atomic_min_i32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -2606,7 +2573,6 @@ define amdgpu_ps <2 x float> @global_min_saddr_i64_rtn(ptr addrspace(1) inreg %s
; GFX12-NEXT: global_atomic_min_i64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -2641,7 +2607,6 @@ define amdgpu_ps <2 x float> @global_min_saddr_i64_rtn_neg128(ptr addrspace(1) i
; GFX12-NEXT: global_atomic_min_i64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -2746,7 +2711,6 @@ define amdgpu_ps float @global_umax_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX12-NEXT: global_atomic_max_u32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -2781,7 +2745,6 @@ define amdgpu_ps float @global_umax_saddr_i32_rtn_neg128(ptr addrspace(1) inreg
; GFX12-NEXT: global_atomic_max_u32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -2882,7 +2845,6 @@ define amdgpu_ps <2 x float> @global_umax_saddr_i64_rtn(ptr addrspace(1) inreg %
; GFX12-NEXT: global_atomic_max_u64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -2917,7 +2879,6 @@ define amdgpu_ps <2 x float> @global_umax_saddr_i64_rtn_neg128(ptr addrspace(1)
; GFX12-NEXT: global_atomic_max_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -3022,7 +2983,6 @@ define amdgpu_ps float @global_umin_saddr_i32_rtn(ptr addrspace(1) inreg %sbase,
; GFX12-NEXT: global_atomic_min_u32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -3057,7 +3017,6 @@ define amdgpu_ps float @global_umin_saddr_i32_rtn_neg128(ptr addrspace(1) inreg
; GFX12-NEXT: global_atomic_min_u32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -3158,7 +3117,6 @@ define amdgpu_ps <2 x float> @global_umin_saddr_i64_rtn(ptr addrspace(1) inreg %
; GFX12-NEXT: global_atomic_min_u64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -3193,7 +3151,6 @@ define amdgpu_ps <2 x float> @global_umin_saddr_i64_rtn_neg128(ptr addrspace(1)
; GFX12-NEXT: global_atomic_min_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -3307,7 +3264,6 @@ define amdgpu_ps float @global_cmpxchg_saddr_i32_rtn(ptr addrspace(1) inreg %sba
; GFX12-NEXT: global_atomic_cmpswap_b32 v0, v0, v[2:3], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -3352,7 +3308,6 @@ define amdgpu_ps float @global_cmpxchg_saddr_i32_rtn_neg128(ptr addrspace(1) inr
; GFX12-NEXT: global_atomic_cmpswap_b32 v0, v0, v[2:3], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -3487,7 +3442,6 @@ define amdgpu_ps <2 x float> @global_cmpxchg_saddr_i64_rtn(ptr addrspace(1) inre
; GFX12-NEXT: global_atomic_cmpswap_b64 v[0:1], v0, v[3:6], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -3536,7 +3490,6 @@ define amdgpu_ps <2 x float> @global_cmpxchg_saddr_i64_rtn_neg128(ptr addrspace(
; GFX12-NEXT: global_atomic_cmpswap_b64 v[0:1], v0, v[3:6], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset

View File

@ -3771,7 +3771,6 @@ define amdgpu_ps float @atomic_global_load_saddr_i32(ptr addrspace(1) inreg %sba
; GFX12-NEXT: global_load_b32 v0, v0, s[2:3] scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -3809,7 +3808,6 @@ define amdgpu_ps float @atomic_global_load_saddr_i32_immneg128(ptr addrspace(1)
; GFX12-NEXT: global_load_b32 v0, v0, s[2:3] offset:-128 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -3848,7 +3846,6 @@ define amdgpu_ps <2 x float> @atomic_global_load_saddr_i64(ptr addrspace(1) inre
; GFX12-NEXT: global_load_b64 v[0:1], v0, s[2:3] scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
@ -3886,7 +3883,6 @@ define amdgpu_ps <2 x float> @atomic_global_load_saddr_i64_immneg128(ptr addrspa
; GFX12-NEXT: global_load_b64 v[0:1], v0, s[2:3] offset:-128 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset

View File

@ -127,7 +127,6 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 scope:SCOPE_SE
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%res = atomicrmw fadd ptr %addr, float %val syncscope("workgroup") seq_cst
ret void
@ -284,7 +283,6 @@ define i32 @atomic_nand_i32_global(ptr addrspace(1) %ptr) nounwind {
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v2
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%result = atomicrmw nand ptr addrspace(1) %ptr, i32 4 seq_cst
ret i32 %result
@ -872,7 +870,6 @@ define void @flat_atomic_xchg_i32_noret(ptr %ptr, i32 %in) {
; GFX12-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%tmp0 = atomicrmw xchg ptr %ptr, i32 %in seq_cst
ret void

View File

@ -88,7 +88,6 @@ define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_acquire(ptr noundef r
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 2, metadata !0)
@ -103,7 +102,6 @@ define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_acquire(ptr nou
; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 2, metadata !0)
@ -118,7 +116,6 @@ define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_acquire(ptr nou
; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 2, metadata !0)
@ -179,7 +176,6 @@ define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr noundef r
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 5, metadata !0)
@ -195,7 +191,6 @@ define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(ptr nou
; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 5, metadata !0)
@ -211,7 +206,6 @@ define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(ptr nou
; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 5, metadata !0)
@ -349,7 +343,7 @@ define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_acquire(ptr no
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 2, metadata !1)
@ -364,7 +358,7 @@ define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_acquire(
; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 2, metadata !1)
@ -379,7 +373,7 @@ define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_acquire(
; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 2, metadata !1)
@ -440,7 +434,7 @@ define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr no
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 5, metadata !1)
@ -456,7 +450,7 @@ define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(
; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 5, metadata !1)
@ -472,7 +466,7 @@ define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(
; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 5, metadata !1)

View File

@ -88,7 +88,6 @@ define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_acquire(ptr noundef r
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 2, metadata !0)
@ -103,7 +102,6 @@ define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_acquire(ptr nou
; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 2, metadata !0)
@ -118,7 +116,6 @@ define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_acquire(ptr nou
; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 2, metadata !0)
@ -179,7 +176,6 @@ define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr noundef r
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 5, metadata !0)
@ -195,7 +191,6 @@ define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(ptr nou
; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 5, metadata !0)
@ -211,7 +206,6 @@ define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(ptr nou
; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 5, metadata !0)
@ -349,7 +343,7 @@ define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_acquire(ptr no
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 2, metadata !1)
@ -364,7 +358,7 @@ define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_acquire(
; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 2, metadata !1)
@ -379,7 +373,7 @@ define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_acquire(
; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 2, metadata !1)
@ -440,7 +434,7 @@ define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr no
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 5, metadata !1)
@ -456,7 +450,7 @@ define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(
; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 5, metadata !1)
@ -472,7 +466,7 @@ define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(
; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 5, metadata !1)

View File

@ -28,7 +28,6 @@ define float @local_atomic_fadd_ret_f32(ptr addrspace(3) %ptr) nounwind {
; GFX12-NEXT: ds_add_rtn_f32 v0, v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_ret_f32:
@ -144,7 +143,6 @@ define float @local_atomic_fadd_ret_f32__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-NEXT: ds_add_rtn_f32 v0, v0, v1 offset:65532
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_ret_f32__offset:
@ -261,7 +259,6 @@ define void @local_atomic_fadd_noret_f32(ptr addrspace(3) %ptr) nounwind {
; GFX12-NEXT: ds_add_f32 v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_noret_f32:
@ -375,7 +372,6 @@ define void @local_atomic_fadd_noret_f32__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-NEXT: ds_add_f32 v0, v1 offset:65532
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_noret_f32__offset:
@ -511,7 +507,6 @@ define double @local_atomic_fadd_ret_f64(ptr addrspace(3) %ptr) nounwind {
; GFX12-NEXT: s_cbranch_execnz .LBB4_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_ret_f64:
@ -703,7 +698,6 @@ define double @local_atomic_fadd_ret_f64__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-NEXT: s_cbranch_execnz .LBB5_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_ret_f64__offset:
@ -894,7 +888,6 @@ define void @local_atomic_fadd_noret_f64(ptr addrspace(3) %ptr) nounwind {
; GFX12-NEXT: s_cbranch_execnz .LBB6_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_noret_f64:
@ -1077,7 +1070,6 @@ define void @local_atomic_fadd_noret_f64__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-NEXT: s_cbranch_execnz .LBB7_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_noret_f64__offset:
@ -1279,7 +1271,6 @@ define half @local_atomic_fadd_ret_f16(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fadd_ret_f16:
@ -1322,7 +1313,6 @@ define half @local_atomic_fadd_ret_f16(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v0, v2
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_ret_f16:
@ -1664,7 +1654,6 @@ define half @local_atomic_fadd_ret_f16__offset(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fadd_ret_f16__offset:
@ -1709,7 +1698,6 @@ define half @local_atomic_fadd_ret_f16__offset(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v1, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_ret_f16__offset:
@ -2060,7 +2048,6 @@ define void @local_atomic_fadd_noret_f16(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB10_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fadd_noret_f16:
@ -2102,7 +2089,6 @@ define void @local_atomic_fadd_noret_f16(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB10_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_noret_f16:
@ -2432,7 +2418,6 @@ define void @local_atomic_fadd_noret_f16__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB11_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fadd_noret_f16__offset:
@ -2475,7 +2460,6 @@ define void @local_atomic_fadd_noret_f16__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB11_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_noret_f16__offset:
@ -2806,7 +2790,6 @@ define half @local_atomic_fadd_ret_f16__offset__align4(ptr addrspace(3) %ptr) no
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fadd_ret_f16__offset__align4:
@ -2840,7 +2823,6 @@ define half @local_atomic_fadd_ret_f16__offset__align4(ptr addrspace(3) %ptr) no
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_ret_f16__offset__align4:
@ -3103,7 +3085,6 @@ define void @local_atomic_fadd_noret_f16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB13_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fadd_noret_f16__offset__align4:
@ -3135,7 +3116,6 @@ define void @local_atomic_fadd_noret_f16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB13_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_noret_f16__offset__align4:
@ -3413,7 +3393,6 @@ define bfloat @local_atomic_fadd_ret_bf16(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fadd_ret_bf16:
@ -3465,7 +3444,6 @@ define bfloat @local_atomic_fadd_ret_bf16(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_ret_bf16:
@ -3869,7 +3847,6 @@ define bfloat @local_atomic_fadd_ret_bf16__offset(ptr addrspace(3) %ptr) nounwin
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fadd_ret_bf16__offset:
@ -3923,7 +3900,6 @@ define bfloat @local_atomic_fadd_ret_bf16__offset(ptr addrspace(3) %ptr) nounwin
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v1, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_ret_bf16__offset:
@ -4336,7 +4312,6 @@ define void @local_atomic_fadd_noret_bf16(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB16_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fadd_noret_bf16:
@ -4387,7 +4362,6 @@ define void @local_atomic_fadd_noret_bf16(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB16_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_noret_bf16:
@ -4778,7 +4752,6 @@ define void @local_atomic_fadd_noret_bf16__offset(ptr addrspace(3) %ptr) nounwin
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB17_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fadd_noret_bf16__offset:
@ -4830,7 +4803,6 @@ define void @local_atomic_fadd_noret_bf16__offset(ptr addrspace(3) %ptr) nounwin
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB17_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_noret_bf16__offset:
@ -5222,7 +5194,6 @@ define bfloat @local_atomic_fadd_ret_bf16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fadd_ret_bf16__offset__align4:
@ -5265,7 +5236,6 @@ define bfloat @local_atomic_fadd_ret_bf16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_ret_bf16__offset__align4:
@ -5596,7 +5566,6 @@ define void @local_atomic_fadd_noret_bf16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB19_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fadd_noret_bf16__offset__align4:
@ -5637,7 +5606,6 @@ define void @local_atomic_fadd_noret_bf16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB19_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_noret_bf16__offset__align4:
@ -5933,7 +5901,6 @@ define <2 x half> @local_atomic_fadd_ret_v2f16(ptr addrspace(3) %ptr, <2 x half>
; GFX12-NEXT: ds_pk_add_rtn_f16 v0, v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_ret_v2f16:
@ -6157,7 +6124,6 @@ define <2 x half> @local_atomic_fadd_ret_v2f16__offset(ptr addrspace(3) %ptr, <2
; GFX12-NEXT: ds_pk_add_rtn_f16 v0, v0, v1 offset:65532
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_ret_v2f16__offset:
@ -6381,7 +6347,6 @@ define void @local_atomic_fadd_noret_v2f16(ptr addrspace(3) %ptr, <2 x half> %va
; GFX12-NEXT: ds_pk_add_f16 v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_noret_v2f16:
@ -6596,7 +6561,6 @@ define void @local_atomic_fadd_noret_v2f16__offset(ptr addrspace(3) %ptr, <2 x h
; GFX12-NEXT: ds_pk_add_f16 v0, v1 offset:65532
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_noret_v2f16__offset:
@ -6817,7 +6781,6 @@ define <2 x bfloat> @local_atomic_fadd_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
; GFX12-NEXT: ds_pk_add_rtn_bf16 v0, v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_ret_v2bf16:
@ -7171,7 +7134,6 @@ define <2 x bfloat> @local_atomic_fadd_ret_v2bf16__offset(ptr addrspace(3) %ptr,
; GFX12-NEXT: ds_pk_add_rtn_bf16 v0, v0, v1 offset:65532
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_ret_v2bf16__offset:
@ -7526,7 +7488,6 @@ define void @local_atomic_fadd_noret_v2bf16(ptr addrspace(3) %ptr, <2 x bfloat>
; GFX12-NEXT: ds_pk_add_bf16 v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_noret_v2bf16:
@ -7867,7 +7828,6 @@ define void @local_atomic_fadd_noret_v2bf16__ofset(ptr addrspace(3) %ptr, <2 x b
; GFX12-NEXT: ds_pk_add_bf16 v0, v1 offset:65532
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_noret_v2bf16__ofset:
@ -9903,7 +9863,6 @@ define float @local_atomic_fadd_ret_f32__amdgpu_ignore_denormal_mode(ptr addrspa
; GFX12-NEXT: ds_add_rtn_f32 v0, v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_ret_f32__amdgpu_ignore_denormal_mode:
@ -10019,7 +9978,6 @@ define void @local_atomic_fadd_noret_f32__amdgpu_ignore_denormal_mode(ptr addrsp
; GFX12-NEXT: ds_add_f32 v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fadd_noret_f32__amdgpu_ignore_denormal_mode:

View File

@ -28,7 +28,6 @@ define float @local_atomic_fmax_ret_f32(ptr addrspace(3) %ptr) nounwind {
; GFX12-NEXT: ds_max_num_rtn_f32 v0, v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_ret_f32:
@ -118,7 +117,6 @@ define float @local_atomic_fmax_ret_f32__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-NEXT: ds_max_num_rtn_f32 v0, v0, v1 offset:65532
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_ret_f32__offset:
@ -210,7 +208,6 @@ define void @local_atomic_fmax_noret_f32(ptr addrspace(3) %ptr) nounwind {
; GFX12-NEXT: ds_max_num_f32 v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_noret_f32:
@ -300,7 +297,6 @@ define void @local_atomic_fmax_noret_f32__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-NEXT: ds_max_num_f32 v0, v1 offset:65532
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_noret_f32__offset:
@ -397,7 +393,6 @@ define double @local_atomic_fmax_ret_f64(ptr addrspace(3) %ptr) nounwind {
; GFX12-NEXT: ds_max_num_rtn_f64 v[0:1], v0, v[1:2]
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_ret_f64:
@ -495,7 +490,6 @@ define double @local_atomic_fmax_ret_f64__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-NEXT: ds_max_num_rtn_f64 v[0:1], v0, v[1:2] offset:65528
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_ret_f64__offset:
@ -595,7 +589,6 @@ define void @local_atomic_fmax_noret_f64(ptr addrspace(3) %ptr) nounwind {
; GFX12-NEXT: ds_max_num_f64 v0, v[1:2]
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_noret_f64:
@ -693,7 +686,6 @@ define void @local_atomic_fmax_noret_f64__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-NEXT: ds_max_num_f64 v0, v[1:2] offset:65528
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_noret_f64__offset:
@ -825,7 +817,6 @@ define half @local_atomic_fmax_ret_f16(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmax_ret_f16:
@ -869,7 +860,6 @@ define half @local_atomic_fmax_ret_f16(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_ret_f16:
@ -1221,7 +1211,6 @@ define half @local_atomic_fmax_ret_f16__offset(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmax_ret_f16__offset:
@ -1267,7 +1256,6 @@ define half @local_atomic_fmax_ret_f16__offset(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v1, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_ret_f16__offset:
@ -1628,7 +1616,6 @@ define void @local_atomic_fmax_noret_f16(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB10_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmax_noret_f16:
@ -1671,7 +1658,6 @@ define void @local_atomic_fmax_noret_f16(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB10_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_noret_f16:
@ -2010,7 +1996,6 @@ define void @local_atomic_fmax_noret_f16__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB11_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmax_noret_f16__offset:
@ -2055,7 +2040,6 @@ define void @local_atomic_fmax_noret_f16__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB11_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_noret_f16__offset:
@ -2396,7 +2380,6 @@ define half @local_atomic_fmax_ret_f16__offset__align4(ptr addrspace(3) %ptr) no
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmax_ret_f16__offset__align4:
@ -2431,7 +2414,6 @@ define half @local_atomic_fmax_ret_f16__offset__align4(ptr addrspace(3) %ptr) no
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_ret_f16__offset__align4:
@ -2703,7 +2685,6 @@ define void @local_atomic_fmax_noret_f16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB13_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmax_noret_f16__offset__align4:
@ -2737,7 +2718,6 @@ define void @local_atomic_fmax_noret_f16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB13_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_noret_f16__offset__align4:
@ -3023,7 +3003,6 @@ define bfloat @local_atomic_fmax_ret_bf16(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmax_ret_bf16:
@ -3075,7 +3054,6 @@ define bfloat @local_atomic_fmax_ret_bf16(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_ret_bf16:
@ -3481,7 +3459,6 @@ define bfloat @local_atomic_fmax_ret_bf16__offset(ptr addrspace(3) %ptr) nounwin
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmax_ret_bf16__offset:
@ -3535,7 +3512,6 @@ define bfloat @local_atomic_fmax_ret_bf16__offset(ptr addrspace(3) %ptr) nounwin
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v1, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_ret_bf16__offset:
@ -3950,7 +3926,6 @@ define void @local_atomic_fmax_noret_bf16(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB16_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmax_noret_bf16:
@ -4001,7 +3976,6 @@ define void @local_atomic_fmax_noret_bf16(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB16_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_noret_bf16:
@ -4394,7 +4368,6 @@ define void @local_atomic_fmax_noret_bf16__offset(ptr addrspace(3) %ptr) nounwin
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB17_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmax_noret_bf16__offset:
@ -4446,7 +4419,6 @@ define void @local_atomic_fmax_noret_bf16__offset(ptr addrspace(3) %ptr) nounwin
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB17_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_noret_bf16__offset:
@ -4840,7 +4812,6 @@ define bfloat @local_atomic_fmax_ret_bf16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmax_ret_bf16__offset__align4:
@ -4883,7 +4854,6 @@ define bfloat @local_atomic_fmax_ret_bf16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_ret_bf16__offset__align4:
@ -5216,7 +5186,6 @@ define void @local_atomic_fmax_noret_bf16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB19_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmax_noret_bf16__offset__align4:
@ -5257,7 +5226,6 @@ define void @local_atomic_fmax_noret_bf16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB19_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_noret_bf16__offset__align4:
@ -5574,7 +5542,6 @@ define <2 x half> @local_atomic_fmax_ret_v2f16(ptr addrspace(3) %ptr, <2 x half>
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v2
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_ret_v2f16:
@ -5846,7 +5813,6 @@ define <2 x half> @local_atomic_fmax_ret_v2f16__offset(ptr addrspace(3) %ptr, <2
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v2
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_ret_v2f16__offset:
@ -6117,7 +6083,6 @@ define void @local_atomic_fmax_noret_v2f16(ptr addrspace(3) %ptr, <2 x half> %va
; GFX12-NEXT: s_cbranch_execnz .LBB22_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_noret_v2f16:
@ -6379,7 +6344,6 @@ define void @local_atomic_fmax_noret_v2f16__offset(ptr addrspace(3) %ptr, <2 x h
; GFX12-NEXT: s_cbranch_execnz .LBB23_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_noret_v2f16__offset:
@ -6668,7 +6632,6 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v2
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmax_ret_v2bf16:
@ -6720,7 +6683,6 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v2
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_ret_v2bf16:
@ -7146,7 +7108,6 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16__offset(ptr addrspace(3) %ptr,
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v2
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmax_ret_v2bf16__offset:
@ -7198,7 +7159,6 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16__offset(ptr addrspace(3) %ptr,
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v2
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_ret_v2bf16__offset:
@ -7621,7 +7581,6 @@ define void @local_atomic_fmax_noret_v2bf16(ptr addrspace(3) %ptr, <2 x bfloat>
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB26_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmax_noret_v2bf16:
@ -7671,7 +7630,6 @@ define void @local_atomic_fmax_noret_v2bf16(ptr addrspace(3) %ptr, <2 x bfloat>
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB26_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_noret_v2bf16:
@ -8079,7 +8037,6 @@ define void @local_atomic_fmax_noret_v2bf16__ofset(ptr addrspace(3) %ptr, <2 x b
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB27_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmax_noret_v2bf16__ofset:
@ -8129,7 +8086,6 @@ define void @local_atomic_fmax_noret_v2bf16__ofset(ptr addrspace(3) %ptr, <2 x b
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB27_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_noret_v2bf16__ofset:
@ -8509,7 +8465,6 @@ define float @local_atomic_fmax_ret_f32__amdgpu_ignore_denormal_mode(ptr addrspa
; GFX12-NEXT: ds_max_num_rtn_f32 v0, v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_ret_f32__amdgpu_ignore_denormal_mode:
@ -8599,7 +8554,6 @@ define void @local_atomic_fmax_noret_f32__amdgpu_ignore_denormal_mode(ptr addrsp
; GFX12-NEXT: ds_max_num_f32 v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmax_noret_f32__amdgpu_ignore_denormal_mode:

View File

@ -28,7 +28,6 @@ define float @local_atomic_fmin_ret_f32(ptr addrspace(3) %ptr) nounwind {
; GFX12-NEXT: ds_min_num_rtn_f32 v0, v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_ret_f32:
@ -118,7 +117,6 @@ define float @local_atomic_fmin_ret_f32__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-NEXT: ds_min_num_rtn_f32 v0, v0, v1 offset:65532
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_ret_f32__offset:
@ -210,7 +208,6 @@ define void @local_atomic_fmin_noret_f32(ptr addrspace(3) %ptr) nounwind {
; GFX12-NEXT: ds_min_num_f32 v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_noret_f32:
@ -300,7 +297,6 @@ define void @local_atomic_fmin_noret_f32__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-NEXT: ds_min_num_f32 v0, v1 offset:65532
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_noret_f32__offset:
@ -397,7 +393,6 @@ define double @local_atomic_fmin_ret_f64(ptr addrspace(3) %ptr) nounwind {
; GFX12-NEXT: ds_min_num_rtn_f64 v[0:1], v0, v[1:2]
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_ret_f64:
@ -495,7 +490,6 @@ define double @local_atomic_fmin_ret_f64__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-NEXT: ds_min_num_rtn_f64 v[0:1], v0, v[1:2] offset:65528
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_ret_f64__offset:
@ -595,7 +589,6 @@ define void @local_atomic_fmin_noret_f64(ptr addrspace(3) %ptr) nounwind {
; GFX12-NEXT: ds_min_num_f64 v0, v[1:2]
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_noret_f64:
@ -693,7 +686,6 @@ define void @local_atomic_fmin_noret_f64__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-NEXT: ds_min_num_f64 v0, v[1:2] offset:65528
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_noret_f64__offset:
@ -825,7 +817,6 @@ define half @local_atomic_fmin_ret_f16(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmin_ret_f16:
@ -869,7 +860,6 @@ define half @local_atomic_fmin_ret_f16(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_ret_f16:
@ -1221,7 +1211,6 @@ define half @local_atomic_fmin_ret_f16__offset(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmin_ret_f16__offset:
@ -1267,7 +1256,6 @@ define half @local_atomic_fmin_ret_f16__offset(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v1, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_ret_f16__offset:
@ -1628,7 +1616,6 @@ define void @local_atomic_fmin_noret_f16(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB10_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmin_noret_f16:
@ -1671,7 +1658,6 @@ define void @local_atomic_fmin_noret_f16(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB10_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_noret_f16:
@ -2010,7 +1996,6 @@ define void @local_atomic_fmin_noret_f16__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB11_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmin_noret_f16__offset:
@ -2055,7 +2040,6 @@ define void @local_atomic_fmin_noret_f16__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB11_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_noret_f16__offset:
@ -2396,7 +2380,6 @@ define half @local_atomic_fmin_ret_f16__offset__align4(ptr addrspace(3) %ptr) no
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmin_ret_f16__offset__align4:
@ -2431,7 +2414,6 @@ define half @local_atomic_fmin_ret_f16__offset__align4(ptr addrspace(3) %ptr) no
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_ret_f16__offset__align4:
@ -2703,7 +2685,6 @@ define void @local_atomic_fmin_noret_f16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB13_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmin_noret_f16__offset__align4:
@ -2737,7 +2718,6 @@ define void @local_atomic_fmin_noret_f16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB13_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_noret_f16__offset__align4:
@ -3023,7 +3003,6 @@ define bfloat @local_atomic_fmin_ret_bf16(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmin_ret_bf16:
@ -3075,7 +3054,6 @@ define bfloat @local_atomic_fmin_ret_bf16(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_ret_bf16:
@ -3481,7 +3459,6 @@ define bfloat @local_atomic_fmin_ret_bf16__offset(ptr addrspace(3) %ptr) nounwin
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmin_ret_bf16__offset:
@ -3535,7 +3512,6 @@ define bfloat @local_atomic_fmin_ret_bf16__offset(ptr addrspace(3) %ptr) nounwin
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v1, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_ret_bf16__offset:
@ -3950,7 +3926,6 @@ define void @local_atomic_fmin_noret_bf16(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB16_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmin_noret_bf16:
@ -4001,7 +3976,6 @@ define void @local_atomic_fmin_noret_bf16(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB16_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_noret_bf16:
@ -4394,7 +4368,6 @@ define void @local_atomic_fmin_noret_bf16__offset(ptr addrspace(3) %ptr) nounwin
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB17_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmin_noret_bf16__offset:
@ -4446,7 +4419,6 @@ define void @local_atomic_fmin_noret_bf16__offset(ptr addrspace(3) %ptr) nounwin
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB17_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_noret_bf16__offset:
@ -4840,7 +4812,6 @@ define bfloat @local_atomic_fmin_ret_bf16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmin_ret_bf16__offset__align4:
@ -4883,7 +4854,6 @@ define bfloat @local_atomic_fmin_ret_bf16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_ret_bf16__offset__align4:
@ -5216,7 +5186,6 @@ define void @local_atomic_fmin_noret_bf16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB19_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmin_noret_bf16__offset__align4:
@ -5257,7 +5226,6 @@ define void @local_atomic_fmin_noret_bf16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB19_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_noret_bf16__offset__align4:
@ -5574,7 +5542,6 @@ define <2 x half> @local_atomic_fmin_ret_v2f16(ptr addrspace(3) %ptr, <2 x half>
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v2
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_ret_v2f16:
@ -5846,7 +5813,6 @@ define <2 x half> @local_atomic_fmin_ret_v2f16__offset(ptr addrspace(3) %ptr, <2
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v2
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_ret_v2f16__offset:
@ -6117,7 +6083,6 @@ define void @local_atomic_fmin_noret_v2f16(ptr addrspace(3) %ptr, <2 x half> %va
; GFX12-NEXT: s_cbranch_execnz .LBB22_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_noret_v2f16:
@ -6379,7 +6344,6 @@ define void @local_atomic_fmin_noret_v2f16__offset(ptr addrspace(3) %ptr, <2 x h
; GFX12-NEXT: s_cbranch_execnz .LBB23_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_noret_v2f16__offset:
@ -6668,7 +6632,6 @@ define <2 x bfloat> @local_atomic_fmin_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v2
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmin_ret_v2bf16:
@ -6720,7 +6683,6 @@ define <2 x bfloat> @local_atomic_fmin_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v2
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_ret_v2bf16:
@ -7146,7 +7108,6 @@ define <2 x bfloat> @local_atomic_fmin_ret_v2bf16__offset(ptr addrspace(3) %ptr,
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v2
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmin_ret_v2bf16__offset:
@ -7198,7 +7159,6 @@ define <2 x bfloat> @local_atomic_fmin_ret_v2bf16__offset(ptr addrspace(3) %ptr,
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v2
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_ret_v2bf16__offset:
@ -7621,7 +7581,6 @@ define void @local_atomic_fmin_noret_v2bf16(ptr addrspace(3) %ptr, <2 x bfloat>
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB26_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmin_noret_v2bf16:
@ -7671,7 +7630,6 @@ define void @local_atomic_fmin_noret_v2bf16(ptr addrspace(3) %ptr, <2 x bfloat>
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB26_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_noret_v2bf16:
@ -8079,7 +8037,6 @@ define void @local_atomic_fmin_noret_v2bf16__ofset(ptr addrspace(3) %ptr, <2 x b
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB27_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fmin_noret_v2bf16__ofset:
@ -8129,7 +8086,6 @@ define void @local_atomic_fmin_noret_v2bf16__ofset(ptr addrspace(3) %ptr, <2 x b
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB27_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_noret_v2bf16__ofset:
@ -8509,7 +8465,6 @@ define float @local_atomic_fmin_ret_f32__amdgpu_ignore_denormal_mode(ptr addrspa
; GFX12-NEXT: ds_min_num_rtn_f32 v0, v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_ret_f32__amdgpu_ignore_denormal_mode:
@ -8599,7 +8554,6 @@ define void @local_atomic_fmin_noret_f32__amdgpu_ignore_denormal_mode(ptr addrsp
; GFX12-NEXT: ds_min_num_f32 v0, v1
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fmin_noret_f32__amdgpu_ignore_denormal_mode:

View File

@ -44,7 +44,6 @@ define float @local_atomic_fsub_ret_f32(ptr addrspace(3) %ptr) nounwind {
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v1
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_ret_f32:
@ -256,7 +255,6 @@ define float @local_atomic_fsub_ret_f32__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v1
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_ret_f32__offset:
@ -467,7 +465,6 @@ define void @local_atomic_fsub_noret_f32(ptr addrspace(3) %ptr) nounwind {
; GFX12-NEXT: s_cbranch_execnz .LBB2_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_noret_f32:
@ -668,7 +665,6 @@ define void @local_atomic_fsub_noret_f32__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-NEXT: s_cbranch_execnz .LBB3_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_noret_f32__offset:
@ -877,7 +873,6 @@ define double @local_atomic_fsub_ret_f64(ptr addrspace(3) %ptr) nounwind {
; GFX12-NEXT: s_cbranch_execnz .LBB4_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_ret_f64:
@ -1094,7 +1089,6 @@ define double @local_atomic_fsub_ret_f64__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-NEXT: s_cbranch_execnz .LBB5_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_ret_f64__offset:
@ -1310,7 +1304,6 @@ define void @local_atomic_fsub_noret_f64(ptr addrspace(3) %ptr) nounwind {
; GFX12-NEXT: s_cbranch_execnz .LBB6_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_noret_f64:
@ -1516,7 +1509,6 @@ define void @local_atomic_fsub_noret_f64__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-NEXT: s_cbranch_execnz .LBB7_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_noret_f64__offset:
@ -1741,7 +1733,6 @@ define half @local_atomic_fsub_ret_f16(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fsub_ret_f16:
@ -1784,7 +1775,6 @@ define half @local_atomic_fsub_ret_f16(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v0, v2
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_ret_f16:
@ -2126,7 +2116,6 @@ define half @local_atomic_fsub_ret_f16__offset(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fsub_ret_f16__offset:
@ -2171,7 +2160,6 @@ define half @local_atomic_fsub_ret_f16__offset(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v1, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_ret_f16__offset:
@ -2522,7 +2510,6 @@ define void @local_atomic_fsub_noret_f16(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB10_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fsub_noret_f16:
@ -2564,7 +2551,6 @@ define void @local_atomic_fsub_noret_f16(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB10_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_noret_f16:
@ -2894,7 +2880,6 @@ define void @local_atomic_fsub_noret_f16__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB11_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fsub_noret_f16__offset:
@ -2937,7 +2922,6 @@ define void @local_atomic_fsub_noret_f16__offset(ptr addrspace(3) %ptr) nounwind
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB11_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_noret_f16__offset:
@ -3268,7 +3252,6 @@ define half @local_atomic_fsub_ret_f16__offset__align4(ptr addrspace(3) %ptr) no
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fsub_ret_f16__offset__align4:
@ -3302,7 +3285,6 @@ define half @local_atomic_fsub_ret_f16__offset__align4(ptr addrspace(3) %ptr) no
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_ret_f16__offset__align4:
@ -3565,7 +3547,6 @@ define void @local_atomic_fsub_noret_f16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB13_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fsub_noret_f16__offset__align4:
@ -3597,7 +3578,6 @@ define void @local_atomic_fsub_noret_f16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB13_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_noret_f16__offset__align4:
@ -3875,7 +3855,6 @@ define bfloat @local_atomic_fsub_ret_bf16(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v0, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fsub_ret_bf16:
@ -3927,7 +3906,6 @@ define bfloat @local_atomic_fsub_ret_bf16(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v0, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_ret_bf16:
@ -4331,7 +4309,6 @@ define bfloat @local_atomic_fsub_ret_bf16__offset(ptr addrspace(3) %ptr) nounwin
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v1, v3
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fsub_ret_bf16__offset:
@ -4385,7 +4362,6 @@ define bfloat @local_atomic_fsub_ret_bf16__offset(ptr addrspace(3) %ptr) nounwin
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v1, v3
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_ret_bf16__offset:
@ -4798,7 +4774,6 @@ define void @local_atomic_fsub_noret_bf16(ptr addrspace(3) %ptr) nounwind {
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB16_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fsub_noret_bf16:
@ -4849,7 +4824,6 @@ define void @local_atomic_fsub_noret_bf16(ptr addrspace(3) %ptr) nounwind {
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB16_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_noret_bf16:
@ -5240,7 +5214,6 @@ define void @local_atomic_fsub_noret_bf16__offset(ptr addrspace(3) %ptr) nounwin
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB17_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fsub_noret_bf16__offset:
@ -5292,7 +5265,6 @@ define void @local_atomic_fsub_noret_bf16__offset(ptr addrspace(3) %ptr) nounwin
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB17_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_noret_bf16__offset:
@ -5684,7 +5656,6 @@ define bfloat @local_atomic_fsub_ret_bf16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fsub_ret_bf16__offset__align4:
@ -5727,7 +5698,6 @@ define bfloat @local_atomic_fsub_ret_bf16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_ret_bf16__offset__align4:
@ -6058,7 +6028,6 @@ define void @local_atomic_fsub_noret_bf16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB19_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fsub_noret_bf16__offset__align4:
@ -6099,7 +6068,6 @@ define void @local_atomic_fsub_noret_bf16__offset__align4(ptr addrspace(3) %ptr)
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB19_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_noret_bf16__offset__align4:
@ -6412,7 +6380,6 @@ define <2 x half> @local_atomic_fsub_ret_v2f16(ptr addrspace(3) %ptr, <2 x half>
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v2
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_ret_v2f16:
@ -6667,7 +6634,6 @@ define <2 x half> @local_atomic_fsub_ret_v2f16__offset(ptr addrspace(3) %ptr, <2
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v2
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_ret_v2f16__offset:
@ -6920,7 +6886,6 @@ define void @local_atomic_fsub_noret_v2f16(ptr addrspace(3) %ptr, <2 x half> %va
; GFX12-NEXT: s_cbranch_execnz .LBB22_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_noret_v2f16:
@ -7163,7 +7128,6 @@ define void @local_atomic_fsub_noret_v2f16__offset(ptr addrspace(3) %ptr, <2 x h
; GFX12-NEXT: s_cbranch_execnz .LBB23_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_noret_v2f16__offset:
@ -7436,7 +7400,6 @@ define <2 x bfloat> @local_atomic_fsub_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v2
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fsub_ret_v2bf16:
@ -7488,7 +7451,6 @@ define <2 x bfloat> @local_atomic_fsub_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v2
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_ret_v2bf16:
@ -7914,7 +7876,6 @@ define <2 x bfloat> @local_atomic_fsub_ret_v2bf16__offset(ptr addrspace(3) %ptr,
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v2
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fsub_ret_v2bf16__offset:
@ -7966,7 +7927,6 @@ define <2 x bfloat> @local_atomic_fsub_ret_v2bf16__offset(ptr addrspace(3) %ptr,
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v2
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_ret_v2bf16__offset:
@ -8389,7 +8349,6 @@ define void @local_atomic_fsub_noret_v2bf16(ptr addrspace(3) %ptr, <2 x bfloat>
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB26_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fsub_noret_v2bf16:
@ -8439,7 +8398,6 @@ define void @local_atomic_fsub_noret_v2bf16(ptr addrspace(3) %ptr, <2 x bfloat>
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB26_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_noret_v2bf16:
@ -8847,7 +8805,6 @@ define void @local_atomic_fsub_noret_v2bf16__ofset(ptr addrspace(3) %ptr, <2 x b
; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB27_1
; GFX12-TRUE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: local_atomic_fsub_noret_v2bf16__ofset:
@ -8897,7 +8854,6 @@ define void @local_atomic_fsub_noret_v2bf16__ofset(ptr addrspace(3) %ptr, <2 x b
; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB27_1
; GFX12-FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_noret_v2bf16__ofset:
@ -9293,7 +9249,6 @@ define float @local_atomic_fsub_ret_f32__amdgpu_ignore_denormal_mode(ptr addrspa
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: v_mov_b32_e32 v0, v1
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_ret_f32__amdgpu_ignore_denormal_mode:
@ -9503,7 +9458,6 @@ define void @local_atomic_fsub_noret_f32__amdgpu_ignore_denormal_mode(ptr addrsp
; GFX12-NEXT: s_cbranch_execnz .LBB29_1
; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: local_atomic_fsub_noret_f32__amdgpu_ignore_denormal_mode:

View File

@ -0,0 +1,115 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX12 %s
# Test that we can optimize away s_wait_loadcnt at function boundaries when
# the only pending LOAD_CNT events are from GLOBAL_INV (which doesn't write
# to VGPRs).
#
# When a function contains only GLOBAL_INV with no actual VMEM loads pending
# to VGPRs, we should not need to emit s_wait_loadcnt 0 before the return.
---
# Test 1: Only GLOBAL_INV, no VGPR loads - should NOT need S_WAIT_LOADCNT
# before return because GLOBAL_INV doesn't write to VGPRs.
name: func_global_inv_only
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: false
body: |
bb.0:
liveins: $sgpr30_sgpr31
; GFX12-LABEL: name: func_global_inv_only
; GFX12: liveins: $sgpr30_sgpr31
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
; GFX12-NEXT: S_WAIT_EXPCNT 0
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
; GFX12-NEXT: S_WAIT_BVHCNT 0
; GFX12-NEXT: S_WAIT_KMCNT 0
; GFX12-NEXT: GLOBAL_INV 16, implicit $exec
; GFX12-NOT: S_WAIT_LOADCNT
; GFX12-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
GLOBAL_INV 16, implicit $exec
S_SETPC_B64_return $sgpr30_sgpr31
...
---
# Test 2: GLOBAL_INV with actual VGPR load - MUST wait for loadcnt
name: func_global_inv_with_vgpr_load
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: false
body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1, $sgpr30_sgpr31
; GFX12-LABEL: name: func_global_inv_with_vgpr_load
; GFX12: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr30_sgpr31
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
; GFX12-NEXT: S_WAIT_EXPCNT 0
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
; GFX12-NEXT: S_WAIT_BVHCNT 0
; GFX12-NEXT: S_WAIT_KMCNT 0
; GFX12-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: GLOBAL_INV 16, implicit $exec
; GFX12-NEXT: S_WAIT_LOADCNT 0
; GFX12-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1)
GLOBAL_INV 16, implicit $exec
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
...
---
# Test 3: Only VGPR load (no GLOBAL_INV) - MUST wait for loadcnt
name: func_vgpr_load_no_global_inv
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: false
body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1, $sgpr30_sgpr31
; GFX12-LABEL: name: func_vgpr_load_no_global_inv
; GFX12: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr30_sgpr31
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
; GFX12-NEXT: S_WAIT_EXPCNT 0
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
; GFX12-NEXT: S_WAIT_BVHCNT 0
; GFX12-NEXT: S_WAIT_KMCNT 0
; GFX12-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: S_WAIT_LOADCNT 0
; GFX12-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1)
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
...
---
# Test 4: GLOBAL_INV with load already waited on - should NOT need S_WAIT_LOADCNT at return
# The load was waited on when $vgpr0 was used, so only GLOBAL_INV is pending at return.
name: func_global_inv_load_already_waited
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: false
body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1, $sgpr30_sgpr31
; GFX12-LABEL: name: func_global_inv_load_already_waited
; GFX12: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr30_sgpr31
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
; GFX12-NEXT: S_WAIT_EXPCNT 0
; GFX12-NEXT: S_WAIT_SAMPLECNT 0
; GFX12-NEXT: S_WAIT_BVHCNT 0
; GFX12-NEXT: S_WAIT_KMCNT 0
; GFX12-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: S_WAIT_LOADCNT 0
; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
; GFX12-NEXT: GLOBAL_INV 16, implicit $exec
; GFX12-NOT: S_WAIT_LOADCNT
; GFX12-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1)
$vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
GLOBAL_INV 16, implicit $exec
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
...