[AMDGPU] Fix missing waitcnt after buffer_wbl2 (#178316)
On GFX9, BUFFER_WBL2 is used to write back dirty cache lines and requires an s_waitcnt vmcnt(0) afterwards to ensure completion. This patch fixes by incrementing vmcnt for buffer_wbl2 instruction --------- Co-authored-by: Jay Foad <jay.foad@gmail.com>
This commit is contained in:
parent
63918f51aa
commit
2dcd75eb44
@ -2783,7 +2783,11 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
|
||||
if (!SIInstrInfo::isLDSDMA(Inst) && FlatASCount > 1)
|
||||
ScoreBrackets->setPendingFlat();
|
||||
} else if (SIInstrInfo::isVMEM(Inst) &&
|
||||
!llvm::AMDGPU::getMUBUFIsBufferInv(Inst.getOpcode())) {
|
||||
(!AMDGPU::getMUBUFIsBufferInv(Inst.getOpcode()) ||
|
||||
Inst.getOpcode() == AMDGPU::BUFFER_WBL2)) {
|
||||
// BUFFER_WBL2 is included here because unlike invalidates, has to be
|
||||
// followed "S_WAITCNT vmcnt(0)" is needed after to ensure the writeback has
|
||||
// completed.
|
||||
IsVMEMAccess = true;
|
||||
ScoreBrackets->updateByEvent(getVmemWaitEventType(Inst), Inst);
|
||||
|
||||
|
||||
@ -333,6 +333,7 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(pt
|
||||
; GFX942-NEXT: v_max_f32_e32 v3, v5, v5
|
||||
; GFX942-NEXT: v_max_f32_e32 v4, v3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -478,6 +479,7 @@ define void @global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -630,6 +632,7 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -785,6 +788,7 @@ define void @global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[2:3], off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -924,6 +928,7 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: v_max_f32_e32 v3, v5, v5
|
||||
; GFX942-NEXT: v_max_f32_e32 v4, v3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1065,6 +1070,7 @@ define void @flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1216,6 +1222,7 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1369,6 +1376,7 @@ define void @flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1513,6 +1521,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
|
||||
; GFX942-NEXT: v_max_f32_e32 v4, v0, v3
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[0:3], 0 offen sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1667,6 +1676,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
|
||||
; GFX942-NEXT: v_max_f32_e32 v0, v0, v3
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1831,6 +1841,7 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1997,6 +2008,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
|
||||
@ -333,6 +333,7 @@ define float @global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(pt
|
||||
; GFX942-NEXT: v_max_f32_e32 v3, v5, v5
|
||||
; GFX942-NEXT: v_min_f32_e32 v4, v3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -478,6 +479,7 @@ define void @global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -630,6 +632,7 @@ define double @global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -785,6 +788,7 @@ define void @global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[2:3], off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -924,6 +928,7 @@ define float @flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: v_max_f32_e32 v3, v5, v5
|
||||
; GFX942-NEXT: v_min_f32_e32 v4, v3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1065,6 +1070,7 @@ define void @flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1216,6 +1222,7 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1369,6 +1376,7 @@ define void @flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1513,6 +1521,7 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_m
|
||||
; GFX942-NEXT: v_min_f32_e32 v4, v0, v3
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[0:3], 0 offen sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1667,6 +1676,7 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_
|
||||
; GFX942-NEXT: v_min_f32_e32 v0, v0, v3
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1831,6 +1841,7 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1997,6 +2008,7 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
|
||||
@ -9,6 +9,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -25,6 +26,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -39,6 +41,7 @@ define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) {
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 4.0
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -76,6 +79,7 @@ define <2 x half> @global_atomic_fadd_ret_v2f16_agent_offset(ptr addrspace(1) %p
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -90,6 +94,7 @@ define void @global_atomic_fadd_noret_v2f16_agent_offset(ptr addrspace(1) %ptr,
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off offset:1024
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -104,6 +109,7 @@ define <2 x half> @flat_atomic_fadd_ret_v2f16_agent_offset(ptr %ptr, <2 x half>
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -118,6 +124,7 @@ define void @flat_atomic_fadd_noret_v2f16_agent_offset(ptr %ptr, <2 x half> %val
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:1024
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
|
||||
@ -1490,7 +1490,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
|
||||
; GFX90A-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3]
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1513,7 +1513,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
|
||||
; GFX942-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3] sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1587,7 +1587,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
|
||||
; GFX942-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1640,7 +1640,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
|
||||
; GFX90A-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3]
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1663,7 +1663,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
|
||||
; GFX942-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3] sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1737,7 +1737,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
|
||||
; GFX942-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1781,6 +1781,7 @@ define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %dat
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1792,6 +1793,7 @@ define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %dat
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1831,6 +1833,7 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1861,6 +1864,7 @@ define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, doub
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1872,6 +1876,7 @@ define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, doub
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1932,7 +1937,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
|
||||
; GFX942-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1978,6 +1983,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1991,6 +1997,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -2035,6 +2042,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2068,6 +2076,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2081,6 +2090,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -2112,6 +2122,7 @@ define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 {
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2123,6 +2134,7 @@ define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 {
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -2162,6 +2174,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2192,6 +2205,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 {
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2204,6 +2218,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 {
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -2248,6 +2263,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
|
||||
@ -16,6 +16,7 @@ define void @flat_atomic_cmpxchg_i32_ret_av_av__av(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v2
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap v0, v[0:1], v[2:3] offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -44,6 +45,7 @@ define void @flat_atomic_cmpxchg_i32_ret_av_av__v(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v2
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap v0, v[0:1], v[2:3] offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -72,6 +74,7 @@ define void @flat_atomic_cmpxchg_i32_ret_av_av__a(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v2
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap v0, v[0:1], v[2:3] offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -103,6 +106,7 @@ define void @flat_atomic_cmpxchg_i32_ret_a_a__a(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap v0, v[0:1], v[2:3] offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -134,6 +138,7 @@ define void @flat_atomic_cmpxchg_i32_ret_a_a__v(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap v0, v[0:1], v[2:3] offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -165,6 +170,7 @@ define void @flat_atomic_cmpxchg_i32_ret_v_a__v(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap v0, v[0:1], v[2:3] offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -194,6 +200,7 @@ define void @flat_atomic_cmpxchg_i32_ret_a_v__v(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v2
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap v0, v[0:1], v[2:3] offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -222,6 +229,7 @@ define void @flat_atomic_cmpxchg_i32_ret_v_v__a(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v2
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap v0, v[0:1], v[2:3] offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -251,6 +259,7 @@ define void @flat_atomic_cmpxchg_i32_ret_av_v__av(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v2
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap v0, v[0:1], v[2:3] offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -279,6 +288,7 @@ define void @flat_atomic_cmpxchg_i32_ret_v_av__av(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v2
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap v0, v[0:1], v[2:3] offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -309,6 +319,7 @@ define void @flat_atomic_cmpxchg_i32_ret_av_a__av(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap v0, v[0:1], v[2:3] offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -339,6 +350,7 @@ define void @flat_atomic_cmpxchg_i32_ret_a_av__av(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap v0, v[0:1], v[2:3] offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -380,6 +392,7 @@ define void @flat_atomic_cmpxchg_i64_ret_av_av__av(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB12_2
|
||||
; CHECK-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[6:7], v[0:3] glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -436,6 +449,7 @@ define void @flat_atomic_cmpxchg_i64_ret_av_av__v(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB13_2
|
||||
; CHECK-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[6:7], v[0:3] glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -492,6 +506,7 @@ define void @flat_atomic_cmpxchg_i64_ret_av_av__a(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB14_2
|
||||
; CHECK-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -561,6 +576,7 @@ define void @flat_atomic_cmpxchg_i64_ret_a_a__a(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v4, a2
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v5, a3
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[0:1], v[2:5] glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -631,6 +647,7 @@ define void @flat_atomic_cmpxchg_i64_ret_a_a__v(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v4, a0
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v5, a1
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -692,6 +709,7 @@ define void @flat_atomic_cmpxchg_i64_ret_v_a__v(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v0, a0
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v1, a1
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -753,6 +771,7 @@ define void @flat_atomic_cmpxchg_i64_ret_a_v__v(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[0:3] glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -810,6 +829,7 @@ define void @flat_atomic_cmpxchg_i64_ret_v_v__a(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB19_2
|
||||
; CHECK-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -871,6 +891,7 @@ define void @flat_atomic_cmpxchg_i64_ret_av_v__av(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB20_2
|
||||
; CHECK-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[6:7], v[0:3] glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -927,6 +948,7 @@ define void @flat_atomic_cmpxchg_i64_ret_v_av__av(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB21_2
|
||||
; CHECK-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[6:7], v[0:3] glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -987,6 +1009,7 @@ define void @flat_atomic_cmpxchg_i64_ret_av_a__av(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v0, a0
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v1, a1
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -1048,6 +1071,7 @@ define void @flat_atomic_cmpxchg_i64_ret_a_av__av(ptr %ptr) #0 {
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[0:3] glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
|
||||
@ -16,6 +16,7 @@ define void @flat_atomic_xchg_i32_ret_a_a(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:40 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -35,6 +36,7 @@ define void @flat_atomic_xchg_i32_ret_a_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:40 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -60,6 +62,7 @@ define void @flat_atomic_xchg_i32_ret_a_v(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:40 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -78,6 +81,7 @@ define void @flat_atomic_xchg_i32_ret_a_v(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:40 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -101,6 +105,7 @@ define void @flat_atomic_xchg_i32_ret_v_a(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def v2
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:40 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -118,6 +123,7 @@ define void @flat_atomic_xchg_i32_ret_v_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:40 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -142,6 +148,7 @@ define void @flat_atomic_xchg_i32_ret_av_av(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def v2
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:40 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -158,6 +165,7 @@ define void @flat_atomic_xchg_i32_ret_av_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:40 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -181,6 +189,7 @@ define void @flat_atomic_xchg_i32_ret_av_v(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def v2
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:40 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -197,6 +206,7 @@ define void @flat_atomic_xchg_i32_ret_av_v(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:40 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -220,6 +230,7 @@ define void @flat_atomic_xchg_i32_ret_av_a(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def v2
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:40 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -237,6 +248,7 @@ define void @flat_atomic_xchg_i32_ret_av_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:40 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -262,6 +274,7 @@ define void @flat_atomic_xchg_i32_ret_a_av(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:40 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -280,6 +293,7 @@ define void @flat_atomic_xchg_i32_ret_a_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:40 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -303,6 +317,7 @@ define void @flat_atomic_xchg_i32_ret_v_av(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def v2
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:40 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -319,6 +334,7 @@ define void @flat_atomic_xchg_i32_ret_v_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:40 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -577,6 +593,7 @@ define void @flat_atomic_xchg_i32_noret_a(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def a0
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap v[0:1], a0 offset:40
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -590,6 +607,7 @@ define void @flat_atomic_xchg_i32_noret_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: ; def a0
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap v[0:1], a0 offset:40 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -608,6 +626,7 @@ define void @flat_atomic_xchg_i32_noret_av(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def v2
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap v[0:1], v2 offset:40
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -621,6 +640,7 @@ define void @flat_atomic_xchg_i32_noret_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap v[0:1], v2 offset:40 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -655,6 +675,7 @@ define void @flat_atomic_xchg_i64_ret_a_a(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_cbranch_execz .LBB11_2
|
||||
; GFX90A-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -701,6 +722,7 @@ define void @flat_atomic_xchg_i64_ret_a_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_cbranch_execz .LBB11_2
|
||||
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -753,6 +775,7 @@ define void @flat_atomic_xchg_i64_ret_a_v(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_cbranch_execz .LBB12_2
|
||||
; GFX90A-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[4:5] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -797,6 +820,7 @@ define void @flat_atomic_xchg_i64_ret_a_v(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_cbranch_execz .LBB12_2
|
||||
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[4:5] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -845,6 +869,7 @@ define void @flat_atomic_xchg_i64_ret_v_a(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_cbranch_execz .LBB13_2
|
||||
; GFX90A-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -889,6 +914,7 @@ define void @flat_atomic_xchg_i64_ret_v_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_cbranch_execz .LBB13_2
|
||||
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -939,6 +965,7 @@ define void @flat_atomic_xchg_i64_ret_av_av(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_cbranch_execz .LBB14_2
|
||||
; GFX90A-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap_x2 v[0:1], v[4:5], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -981,6 +1008,7 @@ define void @flat_atomic_xchg_i64_ret_av_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_cbranch_execz .LBB14_2
|
||||
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[4:5] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1029,6 +1057,7 @@ define void @flat_atomic_xchg_i64_ret_av_v(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_cbranch_execz .LBB15_2
|
||||
; GFX90A-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap_x2 v[0:1], v[4:5], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1071,6 +1100,7 @@ define void @flat_atomic_xchg_i64_ret_av_v(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_cbranch_execz .LBB15_2
|
||||
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[4:5] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1119,6 +1149,7 @@ define void @flat_atomic_xchg_i64_ret_av_a(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_cbranch_execz .LBB16_2
|
||||
; GFX90A-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1163,6 +1194,7 @@ define void @flat_atomic_xchg_i64_ret_av_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_cbranch_execz .LBB16_2
|
||||
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1215,6 +1247,7 @@ define void @flat_atomic_xchg_i64_ret_a_av(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_cbranch_execz .LBB17_2
|
||||
; GFX90A-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[4:5] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1259,6 +1292,7 @@ define void @flat_atomic_xchg_i64_ret_a_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_cbranch_execz .LBB17_2
|
||||
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[4:5] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1307,6 +1341,7 @@ define void @flat_atomic_xchg_i64_ret_v_av(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_cbranch_execz .LBB18_2
|
||||
; GFX90A-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap_x2 v[0:1], v[4:5], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1349,6 +1384,7 @@ define void @flat_atomic_xchg_i64_ret_v_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_cbranch_execz .LBB18_2
|
||||
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[4:5] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1399,6 +1435,7 @@ define void @flat_atomic_xchg_i64_noret_a(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX90A-NEXT: .LBB19_3: ; %atomicrmw.global
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap_x2 v[0:1], a[0:1]
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1435,6 +1472,7 @@ define void @flat_atomic_xchg_i64_noret_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX950-NEXT: .LBB19_3: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap_x2 v[0:1], a[0:1] sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1476,6 +1514,7 @@ define void @flat_atomic_xchg_i64_noret_av(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX90A-NEXT: .LBB20_3: ; %atomicrmw.global
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3]
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1511,6 +1550,7 @@ define void @flat_atomic_xchg_i64_noret_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX950-NEXT: .LBB20_3: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1551,6 +1591,7 @@ define void @flat_atomic_xor_expansion_i32_ret_a_a(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1582,6 +1623,7 @@ define void @flat_atomic_xor_expansion_i32_ret_a_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1620,6 +1662,7 @@ define void @flat_atomic_xor_expansion_i32_ret_a_v(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1650,6 +1693,7 @@ define void @flat_atomic_xor_expansion_i32_ret_a_v(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1686,6 +1730,7 @@ define void @flat_atomic_xor_expansion_i32_ret_v_a(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1716,6 +1761,7 @@ define void @flat_atomic_xor_expansion_i32_ret_v_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1753,6 +1799,7 @@ define void @flat_atomic_xor_expansion_i32_ret_av_av(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1782,6 +1829,7 @@ define void @flat_atomic_xor_expansion_i32_ret_av_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1818,6 +1866,7 @@ define void @flat_atomic_xor_expansion_i32_ret_av_v(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1847,6 +1896,7 @@ define void @flat_atomic_xor_expansion_i32_ret_av_v(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1883,6 +1933,7 @@ define void @flat_atomic_xor_expansion_i32_ret_av_a(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1913,6 +1964,7 @@ define void @flat_atomic_xor_expansion_i32_ret_av_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1951,6 +2003,7 @@ define void @flat_atomic_xor_expansion_i32_ret_a_av(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1981,6 +2034,7 @@ define void @flat_atomic_xor_expansion_i32_ret_a_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -2017,6 +2071,7 @@ define void @flat_atomic_xor_expansion_i32_ret_v_av(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2046,6 +2101,7 @@ define void @flat_atomic_xor_expansion_i32_ret_v_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -2140,6 +2196,7 @@ define void @flat_atomic_xor_expansion_i32_ret_av_av_no_agprs(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v0, v1, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2284,6 +2341,7 @@ define void @flat_atomic_xor_expansion_i32_ret_av_av_no_agprs(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v0, v1, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -2380,6 +2438,7 @@ define void @flat_atomic_xor_expansion_i32_noret_a(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2407,6 +2466,7 @@ define void @flat_atomic_xor_expansion_i32_noret_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -2438,6 +2498,7 @@ define void @flat_atomic_xor_expansion_i32_noret_av(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2464,6 +2525,7 @@ define void @flat_atomic_xor_expansion_i32_noret_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -2510,6 +2572,7 @@ define void @flat_atomic_xor_expansion_i64_ret_a_a(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2572,6 +2635,7 @@ define void @flat_atomic_xor_expansion_i64_ret_a_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -2639,6 +2703,7 @@ define void @flat_atomic_xor_expansion_i64_ret_a_v(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2697,6 +2762,7 @@ define void @flat_atomic_xor_expansion_i64_ret_a_v(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -2758,6 +2824,7 @@ define void @flat_atomic_xor_expansion_i64_ret_v_a(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2818,6 +2885,7 @@ define void @flat_atomic_xor_expansion_i64_ret_v_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -2883,6 +2951,7 @@ define void @flat_atomic_xor_expansion_i64_ret_av_av(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2939,6 +3008,7 @@ define void @flat_atomic_xor_expansion_i64_ret_av_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -3000,6 +3070,7 @@ define void @flat_atomic_xor_expansion_i64_ret_av_v(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -3056,6 +3127,7 @@ define void @flat_atomic_xor_expansion_i64_ret_av_v(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -3117,6 +3189,7 @@ define void @flat_atomic_xor_expansion_i64_ret_av_a(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -3177,6 +3250,7 @@ define void @flat_atomic_xor_expansion_i64_ret_av_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -3244,6 +3318,7 @@ define void @flat_atomic_xor_expansion_i64_ret_a_av(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -3302,6 +3377,7 @@ define void @flat_atomic_xor_expansion_i64_ret_a_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -3363,6 +3439,7 @@ define void @flat_atomic_xor_expansion_i64_ret_v_av(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -3419,6 +3496,7 @@ define void @flat_atomic_xor_expansion_i64_ret_v_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -3486,6 +3564,7 @@ define void @flat_atomic_xor_expansion_i64_noret_a(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -3544,6 +3623,7 @@ define void @flat_atomic_xor_expansion_i64_noret_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -3603,6 +3683,7 @@ define void @flat_atomic_xor_expansion_i64_noret_av(ptr %ptr) #0 {
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -3659,6 +3740,7 @@ define void @flat_atomic_xor_expansion_i64_noret_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -3722,6 +3804,7 @@ define void @flat_atomic_xor_i32_ret_a_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor v0, v[0:1], v2 sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -3763,6 +3846,7 @@ define void @flat_atomic_xor_i32_ret_a_v(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor v0, v[0:1], v2 sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -3801,6 +3885,7 @@ define void @flat_atomic_xor_i32_ret_v_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor v0, v[0:1], v2 sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -3839,6 +3924,7 @@ define void @flat_atomic_xor_i32_ret_av_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor v0, v[0:1], v2 sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -3876,6 +3962,7 @@ define void @flat_atomic_xor_i32_ret_av_v(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor v0, v[0:1], v2 sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -3914,6 +4001,7 @@ define void @flat_atomic_xor_i32_ret_av_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor v0, v[0:1], v2 sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -3955,6 +4043,7 @@ define void @flat_atomic_xor_i32_ret_a_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor v0, v[0:1], v2 sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -3992,6 +4081,7 @@ define void @flat_atomic_xor_i32_ret_v_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor v0, v[0:1], v2 sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -4259,6 +4349,7 @@ define void @flat_atomic_xor_i32_noret_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: ; def a0
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor v[0:1], a0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -4288,6 +4379,7 @@ define void @flat_atomic_xor_i32_noret_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor v[0:1], v2
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -4366,6 +4458,7 @@ define void @flat_atomic_xor_i64_ret_a_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_cbranch_execz .LBB53_2
|
||||
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -4461,6 +4554,7 @@ define void @flat_atomic_xor_i64_ret_a_v(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_cbranch_execz .LBB54_2
|
||||
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1], v[4:5] sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -4552,6 +4646,7 @@ define void @flat_atomic_xor_i64_ret_v_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_cbranch_execz .LBB55_2
|
||||
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -4643,6 +4738,7 @@ define void @flat_atomic_xor_i64_ret_av_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_cbranch_execz .LBB56_2
|
||||
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1], v[4:5] sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -4730,6 +4826,7 @@ define void @flat_atomic_xor_i64_ret_av_v(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_cbranch_execz .LBB57_2
|
||||
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1], v[4:5] sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -4821,6 +4918,7 @@ define void @flat_atomic_xor_i64_ret_av_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_cbranch_execz .LBB58_2
|
||||
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -4916,6 +5014,7 @@ define void @flat_atomic_xor_i64_ret_a_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_cbranch_execz .LBB59_2
|
||||
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1], v[4:5] sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -5003,6 +5102,7 @@ define void @flat_atomic_xor_i64_ret_v_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_cbranch_execz .LBB60_2
|
||||
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1], v[4:5] sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -5098,6 +5198,7 @@ define void @flat_atomic_xor_i64_noret_a(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX950-NEXT: .LBB61_3: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor_x2 v[0:1], a[0:1]
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -5183,6 +5284,7 @@ define void @flat_atomic_xor_i64_noret_av(ptr %ptr) #0 {
|
||||
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX950-NEXT: .LBB62_3: ; %atomicrmw.global
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3]
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
|
||||
@ -16,6 +16,7 @@ define void @global_atomic_cmpxchg_i32_ret_av_av__av(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v2
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap v0, v[0:1], v[2:3], off offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -44,6 +45,7 @@ define void @global_atomic_cmpxchg_i32_ret_av_av__v(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v2
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap v0, v[0:1], v[2:3], off offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -72,6 +74,7 @@ define void @global_atomic_cmpxchg_i32_ret_av_av__a(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v2
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap v0, v[0:1], v[2:3], off offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -103,6 +106,7 @@ define void @global_atomic_cmpxchg_i32_ret_a_a__a(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap v0, v[0:1], v[2:3], off offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -134,6 +138,7 @@ define void @global_atomic_cmpxchg_i32_ret_a_a__v(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap v0, v[0:1], v[2:3], off offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -165,6 +170,7 @@ define void @global_atomic_cmpxchg_i32_ret_v_a__v(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap v0, v[0:1], v[2:3], off offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -194,6 +200,7 @@ define void @global_atomic_cmpxchg_i32_ret_a_v__v(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v2
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap v0, v[0:1], v[2:3], off offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -222,6 +229,7 @@ define void @global_atomic_cmpxchg_i32_ret_v_v__a(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v2
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap v0, v[0:1], v[2:3], off offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -251,6 +259,7 @@ define void @global_atomic_cmpxchg_i32_ret_av_v__av(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v2
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap v0, v[0:1], v[2:3], off offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -279,6 +288,7 @@ define void @global_atomic_cmpxchg_i32_ret_v_av__av(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v2
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap v0, v[0:1], v[2:3], off offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -309,6 +319,7 @@ define void @global_atomic_cmpxchg_i32_ret_av_a__av(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap v0, v[0:1], v[2:3], off offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -339,6 +350,7 @@ define void @global_atomic_cmpxchg_i32_ret_a_av__av(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap v0, v[0:1], v[2:3], off offset:40 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -371,6 +383,7 @@ define void @global_atomic_cmpxchg_i64_ret_av_av__av(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v[2:3]
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap_x2 v[0:1], v[0:1], v[2:5], off offset:80 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -399,6 +412,7 @@ define void @global_atomic_cmpxchg_i64_ret_av_av__v(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v[2:3]
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap_x2 v[0:1], v[0:1], v[2:5], off offset:80 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -427,6 +441,7 @@ define void @global_atomic_cmpxchg_i64_ret_av_av__a(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v[2:3]
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap_x2 v[0:1], v[0:1], v[2:5], off offset:80 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -461,6 +476,7 @@ define void @global_atomic_cmpxchg_i64_ret_a_a__a(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v4, a0
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v5, a1
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap_x2 v[0:1], v[0:1], v[2:5], off offset:80 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -495,6 +511,7 @@ define void @global_atomic_cmpxchg_i64_ret_a_a__v(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v4, a0
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v5, a1
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap_x2 v[0:1], v[0:1], v[2:5], off offset:80 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -525,6 +542,7 @@ define void @global_atomic_cmpxchg_i64_ret_v_a__v(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v[4:5]
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap_x2 v[0:1], v[0:1], v[2:5], off offset:80 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -555,6 +573,7 @@ define void @global_atomic_cmpxchg_i64_ret_a_v__v(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v[2:3]
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap_x2 v[0:1], v[0:1], v[2:5], off offset:80 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -583,6 +602,7 @@ define void @global_atomic_cmpxchg_i64_ret_v_v__a(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v[2:3]
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap_x2 v[0:1], v[0:1], v[2:5], off offset:80 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -613,6 +633,7 @@ define void @global_atomic_cmpxchg_i64_ret_av_v__av(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v[2:3]
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap_x2 v[0:1], v[0:1], v[2:5], off offset:80 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -641,6 +662,7 @@ define void @global_atomic_cmpxchg_i64_ret_v_av__av(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v[2:3]
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap_x2 v[0:1], v[0:1], v[2:5], off offset:80 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -671,6 +693,7 @@ define void @global_atomic_cmpxchg_i64_ret_av_a__av(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v[4:5]
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap_x2 v[0:1], v[0:1], v[2:5], off offset:80 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
@ -701,6 +724,7 @@ define void @global_atomic_cmpxchg_i64_ret_a_av__av(ptr addrspace(1) %ptr) #0 {
|
||||
; CHECK-NEXT: ; def v[2:3]
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_wbl2
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: global_atomic_cmpswap_x2 v[0:1], v[0:1], v[2:5], off offset:80 glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_invl2
|
||||
|
||||
@ -16,6 +16,7 @@ define void @global_atomic_xchg_i32_ret_a_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap v0, v[0:1], v2, off offset:40 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -35,6 +36,7 @@ define void @global_atomic_xchg_i32_ret_a_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap v0, v[0:1], v2, off offset:40 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -60,6 +62,7 @@ define void @global_atomic_xchg_i32_ret_a_v(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap v0, v[0:1], v2, off offset:40 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -78,6 +81,7 @@ define void @global_atomic_xchg_i32_ret_a_v(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap v0, v[0:1], v2, off offset:40 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -101,6 +105,7 @@ define void @global_atomic_xchg_i32_ret_v_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def v2
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap v0, v[0:1], v2, off offset:40 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -118,6 +123,7 @@ define void @global_atomic_xchg_i32_ret_v_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap v0, v[0:1], v2, off offset:40 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -142,6 +148,7 @@ define void @global_atomic_xchg_i32_ret_av_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def v2
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap v0, v[0:1], v2, off offset:40 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -158,6 +165,7 @@ define void @global_atomic_xchg_i32_ret_av_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap v0, v[0:1], v2, off offset:40 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -181,6 +189,7 @@ define void @global_atomic_xchg_i32_ret_av_v(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def v2
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap v0, v[0:1], v2, off offset:40 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -197,6 +206,7 @@ define void @global_atomic_xchg_i32_ret_av_v(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap v0, v[0:1], v2, off offset:40 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -220,6 +230,7 @@ define void @global_atomic_xchg_i32_ret_av_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def v2
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap v0, v[0:1], v2, off offset:40 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -237,6 +248,7 @@ define void @global_atomic_xchg_i32_ret_av_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap v0, v[0:1], v2, off offset:40 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -262,6 +274,7 @@ define void @global_atomic_xchg_i32_ret_a_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap v0, v[0:1], v2, off offset:40 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -280,6 +293,7 @@ define void @global_atomic_xchg_i32_ret_a_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap v0, v[0:1], v2, off offset:40 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -303,6 +317,7 @@ define void @global_atomic_xchg_i32_ret_v_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def v2
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap v0, v[0:1], v2, off offset:40 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -319,6 +334,7 @@ define void @global_atomic_xchg_i32_ret_v_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap v0, v[0:1], v2, off offset:40 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -577,6 +593,7 @@ define void @global_atomic_xchg_i32_noret_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def a0
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap v[0:1], a0, off offset:40
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -590,6 +607,7 @@ define void @global_atomic_xchg_i32_noret_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def a0
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap v[0:1], a0, off offset:40 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -608,6 +626,7 @@ define void @global_atomic_xchg_i32_noret_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def v2
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap v[0:1], v2, off offset:40
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -621,6 +640,7 @@ define void @global_atomic_xchg_i32_noret_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap v[0:1], v2, off offset:40 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -646,6 +666,7 @@ define void @global_atomic_xchg_i64_ret_a_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off offset:80 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -667,6 +688,7 @@ define void @global_atomic_xchg_i64_ret_a_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off offset:80 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -694,6 +716,7 @@ define void @global_atomic_xchg_i64_ret_a_v(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off offset:80 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -713,6 +736,7 @@ define void @global_atomic_xchg_i64_ret_a_v(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off offset:80 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -736,6 +760,7 @@ define void @global_atomic_xchg_i64_ret_v_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def v[2:3]
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off offset:80 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -754,6 +779,7 @@ define void @global_atomic_xchg_i64_ret_v_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v[2:3]
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off offset:80 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -779,6 +805,7 @@ define void @global_atomic_xchg_i64_ret_av_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def v[2:3]
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off offset:80 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -795,6 +822,7 @@ define void @global_atomic_xchg_i64_ret_av_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v[2:3]
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off offset:80 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -818,6 +846,7 @@ define void @global_atomic_xchg_i64_ret_av_v(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def v[2:3]
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off offset:80 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -834,6 +863,7 @@ define void @global_atomic_xchg_i64_ret_av_v(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v[2:3]
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off offset:80 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -857,6 +887,7 @@ define void @global_atomic_xchg_i64_ret_av_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def v[2:3]
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off offset:80 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -875,6 +906,7 @@ define void @global_atomic_xchg_i64_ret_av_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v[2:3]
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off offset:80 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -902,6 +934,7 @@ define void @global_atomic_xchg_i64_ret_a_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off offset:80 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -921,6 +954,7 @@ define void @global_atomic_xchg_i64_ret_a_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off offset:80 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -944,6 +978,7 @@ define void @global_atomic_xchg_i64_ret_v_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def v[2:3]
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off offset:80 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -960,6 +995,7 @@ define void @global_atomic_xchg_i64_ret_v_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v[2:3]
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off offset:80 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -982,6 +1018,7 @@ define void @global_atomic_xchg_i64_noret_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def a[0:1]
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap_x2 v[0:1], a[0:1], off
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -995,6 +1032,7 @@ define void @global_atomic_xchg_i64_noret_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def a[0:1]
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap_x2 v[0:1], a[0:1], off sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1013,6 +1051,7 @@ define void @global_atomic_xchg_i64_noret_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: ; def v[2:3]
|
||||
; GFX90A-NEXT: ;;#ASMEND
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_swap_x2 v[0:1], v[2:3], off
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1026,6 +1065,7 @@ define void @global_atomic_xchg_i64_noret_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v[2:3]
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap_x2 v[0:1], v[2:3], off sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1056,6 +1096,7 @@ define void @global_atomic_xor_expansion_i32_ret_a_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1087,6 +1128,7 @@ define void @global_atomic_xor_expansion_i32_ret_a_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1125,6 +1167,7 @@ define void @global_atomic_xor_expansion_i32_ret_a_v(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1155,6 +1198,7 @@ define void @global_atomic_xor_expansion_i32_ret_a_v(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1191,6 +1235,7 @@ define void @global_atomic_xor_expansion_i32_ret_v_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1221,6 +1266,7 @@ define void @global_atomic_xor_expansion_i32_ret_v_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1258,6 +1304,7 @@ define void @global_atomic_xor_expansion_i32_ret_av_av(ptr addrspace(1) %ptr) #0
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1287,6 +1334,7 @@ define void @global_atomic_xor_expansion_i32_ret_av_av(ptr addrspace(1) %ptr) #0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1323,6 +1371,7 @@ define void @global_atomic_xor_expansion_i32_ret_av_v(ptr addrspace(1) %ptr) #0
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1352,6 +1401,7 @@ define void @global_atomic_xor_expansion_i32_ret_av_v(ptr addrspace(1) %ptr) #0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1388,6 +1438,7 @@ define void @global_atomic_xor_expansion_i32_ret_av_a(ptr addrspace(1) %ptr) #0
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1418,6 +1469,7 @@ define void @global_atomic_xor_expansion_i32_ret_av_a(ptr addrspace(1) %ptr) #0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1456,6 +1508,7 @@ define void @global_atomic_xor_expansion_i32_ret_a_av(ptr addrspace(1) %ptr) #0
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1486,6 +1539,7 @@ define void @global_atomic_xor_expansion_i32_ret_a_av(ptr addrspace(1) %ptr) #0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1522,6 +1576,7 @@ define void @global_atomic_xor_expansion_i32_ret_v_av(ptr addrspace(1) %ptr) #0
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1551,6 +1606,7 @@ define void @global_atomic_xor_expansion_i32_ret_v_av(ptr addrspace(1) %ptr) #0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1645,6 +1701,7 @@ define void @global_atomic_xor_expansion_i32_ret_av_av_no_agprs(ptr addrspace(1)
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v0, v1, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1789,6 +1846,7 @@ define void @global_atomic_xor_expansion_i32_ret_av_av_no_agprs(ptr addrspace(1)
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v0, v1, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap v0, v[2:3], v[0:1], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1885,6 +1943,7 @@ define void @global_atomic_xor_expansion_i32_noret_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1912,6 +1971,7 @@ define void @global_atomic_xor_expansion_i32_noret_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -1943,6 +2003,7 @@ define void @global_atomic_xor_expansion_i32_noret_av(ptr addrspace(1) %ptr) #0
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1969,6 +2030,7 @@ define void @global_atomic_xor_expansion_i32_noret_av(ptr addrspace(1) %ptr) #0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v3, v4
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -2008,6 +2070,7 @@ define void @global_atomic_xor_expansion_i64_ret_a_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2042,6 +2105,7 @@ define void @global_atomic_xor_expansion_i64_ret_a_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -2083,6 +2147,7 @@ define void @global_atomic_xor_expansion_i64_ret_a_v(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2115,6 +2180,7 @@ define void @global_atomic_xor_expansion_i64_ret_a_v(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -2152,6 +2218,7 @@ define void @global_atomic_xor_expansion_i64_ret_v_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2184,6 +2251,7 @@ define void @global_atomic_xor_expansion_i64_ret_v_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -2223,6 +2291,7 @@ define void @global_atomic_xor_expansion_i64_ret_av_av(ptr addrspace(1) %ptr) #0
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2253,6 +2322,7 @@ define void @global_atomic_xor_expansion_i64_ret_av_av(ptr addrspace(1) %ptr) #0
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -2290,6 +2360,7 @@ define void @global_atomic_xor_expansion_i64_ret_av_v(ptr addrspace(1) %ptr) #0
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2320,6 +2391,7 @@ define void @global_atomic_xor_expansion_i64_ret_av_v(ptr addrspace(1) %ptr) #0
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -2357,6 +2429,7 @@ define void @global_atomic_xor_expansion_i64_ret_av_a(ptr addrspace(1) %ptr) #0
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2389,6 +2462,7 @@ define void @global_atomic_xor_expansion_i64_ret_av_a(ptr addrspace(1) %ptr) #0
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -2430,6 +2504,7 @@ define void @global_atomic_xor_expansion_i64_ret_a_av(ptr addrspace(1) %ptr) #0
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2462,6 +2537,7 @@ define void @global_atomic_xor_expansion_i64_ret_a_av(ptr addrspace(1) %ptr) #0
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -2499,6 +2575,7 @@ define void @global_atomic_xor_expansion_i64_ret_v_av(ptr addrspace(1) %ptr) #0
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2529,6 +2606,7 @@ define void @global_atomic_xor_expansion_i64_ret_v_av(ptr addrspace(1) %ptr) #0
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -2567,6 +2645,7 @@ define void @global_atomic_xor_expansion_i64_noret_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2596,6 +2675,7 @@ define void @global_atomic_xor_expansion_i64_noret_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -2628,6 +2708,7 @@ define void @global_atomic_xor_expansion_i64_noret_av(ptr addrspace(1) %ptr) #0
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX90A-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2655,6 +2736,7 @@ define void @global_atomic_xor_expansion_i64_noret_av(ptr addrspace(1) %ptr) #0
|
||||
; GFX950-NEXT: v_xor_b32_e32 v3, v5, v7
|
||||
; GFX950-NEXT: v_xor_b32_e32 v2, v4, v6
|
||||
; GFX950-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off sc0 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc0 sc1
|
||||
@ -2703,6 +2785,7 @@ define void @global_atomic_xor_i32_ret_a_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor v0, v[0:1], v2, off sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -2744,6 +2827,7 @@ define void @global_atomic_xor_i32_ret_a_v(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor v0, v[0:1], v2, off sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -2782,6 +2866,7 @@ define void @global_atomic_xor_i32_ret_v_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor v0, v[0:1], v2, off sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -2820,6 +2905,7 @@ define void @global_atomic_xor_i32_ret_av_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor v0, v[0:1], v2, off sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -2857,6 +2943,7 @@ define void @global_atomic_xor_i32_ret_av_v(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor v0, v[0:1], v2, off sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -2895,6 +2982,7 @@ define void @global_atomic_xor_i32_ret_av_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor v0, v[0:1], v2, off sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -2936,6 +3024,7 @@ define void @global_atomic_xor_i32_ret_a_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor v0, v[0:1], v2, off sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -2973,6 +3062,7 @@ define void @global_atomic_xor_i32_ret_v_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor v0, v[0:1], v2, off sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -3240,6 +3330,7 @@ define void @global_atomic_xor_i32_noret_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def a0
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor v[0:1], a0, off
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -3269,6 +3360,7 @@ define void @global_atomic_xor_i32_noret_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v2
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor v[0:1], v2, off
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -3313,6 +3405,7 @@ define void @global_atomic_xor_i64_ret_a_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor_x2 v[0:1], v[0:1], v[2:3], off sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -3357,6 +3450,7 @@ define void @global_atomic_xor_i64_ret_a_v(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor_x2 v[0:1], v[0:1], v[2:3], off sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -3396,6 +3490,7 @@ define void @global_atomic_xor_i64_ret_v_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v[2:3]
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor_x2 v[0:1], v[0:1], v[2:3], off sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -3435,6 +3530,7 @@ define void @global_atomic_xor_i64_ret_av_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v[2:3]
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor_x2 v[0:1], v[0:1], v[2:3], off sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -3472,6 +3568,7 @@ define void @global_atomic_xor_i64_ret_av_v(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v[2:3]
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor_x2 v[0:1], v[0:1], v[2:3], off sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -3511,6 +3608,7 @@ define void @global_atomic_xor_i64_ret_av_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v[2:3]
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor_x2 v[0:1], v[0:1], v[2:3], off sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -3555,6 +3653,7 @@ define void @global_atomic_xor_i64_ret_a_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; GFX950-NEXT: v_accvgpr_read_b32 v2, a0
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor_x2 v[0:1], v[0:1], v[2:3], off sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -3592,6 +3691,7 @@ define void @global_atomic_xor_i64_ret_v_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v[2:3]
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor_x2 v[0:1], v[0:1], v[2:3], off sc0
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -3625,6 +3725,7 @@ define void @global_atomic_xor_i64_noret_a(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def a[0:1]
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor_x2 v[0:1], a[0:1], off
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
@ -3654,6 +3755,7 @@ define void @global_atomic_xor_i64_noret_av(ptr addrspace(1) %ptr) #0 {
|
||||
; GFX950-NEXT: ; def v[2:3]
|
||||
; GFX950-NEXT: ;;#ASMEND
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_xor_x2 v[0:1], v[2:3], off
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: buffer_inv sc1
|
||||
|
||||
@ -46,6 +46,7 @@ define float @syncscope_system(ptr %addr, float %val) #0 {
|
||||
; GFX90A-NEXT: s_cbranch_execz .LBB0_3
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.global
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_add_f32 v3, v[0:1], v2, off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -84,6 +85,7 @@ define float @syncscope_system(ptr %addr, float %val) #0 {
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
|
||||
@ -35,6 +35,7 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -219,6 +220,7 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -402,6 +404,7 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__waterfall__amdgp
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: s_mov_b64 s[2:3], exec
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX942-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GFX942-NEXT: v_readfirstlane_b32 s5, v1
|
||||
@ -790,6 +793,7 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -990,6 +994,7 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1183,6 +1188,7 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset(ptr addrspace(7)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1400,6 +1406,7 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1617,6 +1624,7 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1857,6 +1865,7 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2098,6 +2107,7 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f64__offset__amdgpu_no_fine_
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2369,6 +2379,7 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__waterfall__amdg
|
||||
; GFX942-NEXT: v_mov_b32_e32 v6, v5
|
||||
; GFX942-NEXT: s_mov_b64 s[2:3], exec
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX942-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GFX942-NEXT: v_readfirstlane_b32 s5, v1
|
||||
@ -2847,6 +2858,7 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_remot
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3107,6 +3119,7 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3431,6 +3444,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s7, v2
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v1, s[0:3], 0 offen sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3862,6 +3876,7 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s7, v2
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v1, s[0:3], 0 offen sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4371,6 +4386,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu
|
||||
; GFX942-NEXT: s_mov_b64 s[8:9], exec
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1
|
||||
; GFX942-NEXT: ; => This Inner Loop Header: Depth=2
|
||||
; GFX942-NEXT: v_readfirstlane_b32 s4, v0
|
||||
@ -5103,6 +5119,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__amdgpu_no_fine
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_add_f32_e32 v0, v0, v5
|
||||
; GFX942-NEXT: v_bfe_u32 v2, v0, 16, 1
|
||||
; GFX942-NEXT: v_or_b32_e32 v3, 0x400000, v0
|
||||
@ -5620,6 +5637,7 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset__amdgpu_no_fine
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_add_f32_e32 v0, v0, v3
|
||||
; GFX942-NEXT: v_bfe_u32 v4, v0, 16, 1
|
||||
; GFX942-NEXT: v_or_b32_e32 v5, 0x400000, v0
|
||||
@ -6220,7 +6238,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amd
|
||||
; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v4
|
||||
; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v6, v7, v10, v4
|
||||
@ -6896,6 +6914,7 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7130,6 +7149,7 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_fin
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7360,6 +7380,7 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__waterfall
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: s_mov_b64 s[2:3], exec
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX942-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GFX942-NEXT: v_readfirstlane_b32 s5, v1
|
||||
@ -7817,6 +7838,7 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset(ptr addrsp
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8067,6 +8089,7 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset(ptr addrspace(
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8307,6 +8330,7 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8557,6 +8581,7 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_rem
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8825,6 +8850,7 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu
|
||||
; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
|
||||
; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v0, v5, v6, s[4:5]
|
||||
; GFX942-NEXT: v_perm_b32 v6, v1, v0, s9
|
||||
@ -9261,6 +9287,7 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fi
|
||||
; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
|
||||
; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[4:5]
|
||||
; GFX942-NEXT: v_perm_b32 v0, v5, v0, s9
|
||||
@ -9707,6 +9734,7 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterf
|
||||
; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
|
||||
; GFX942-NEXT: s_mov_b64 s[8:9], exec
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v8, vcc
|
||||
; GFX942-NEXT: v_and_b32_e32 v7, 0xffff0000, v9
|
||||
; GFX942-NEXT: v_add_f32_e32 v7, v7, v5
|
||||
@ -10416,6 +10444,7 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset(ptr add
|
||||
; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
|
||||
; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v0, v5, v6, s[4:5]
|
||||
; GFX942-NEXT: v_perm_b32 v6, v1, v0, s9
|
||||
@ -10852,6 +10881,7 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset(ptr addrspace
|
||||
; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
|
||||
; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[4:5]
|
||||
; GFX942-NEXT: v_perm_b32 v0, v5, v0, s9
|
||||
@ -11271,6 +11301,7 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu
|
||||
; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
|
||||
; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v0, v5, v6, s[4:5]
|
||||
; GFX942-NEXT: v_perm_b32 v6, v1, v0, s9
|
||||
@ -11707,6 +11738,7 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_re
|
||||
; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
|
||||
; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[4:5]
|
||||
; GFX942-NEXT: v_perm_b32 v0, v5, v0, s9
|
||||
@ -12124,6 +12156,7 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fi
|
||||
; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
|
||||
; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[4:5]
|
||||
; GFX942-NEXT: v_perm_b32 v0, v5, v0, s9
|
||||
@ -12524,6 +12557,7 @@ define float @buffer_fat_ptr_system_atomic_fadd_ret_f32__offset__amdgpu_no_fine_
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -12583,6 +12617,7 @@ define float @buffer_fat_ptr_system_atomic_fadd_ret_f32__offset__amdgpu_no_fine_
|
||||
; GFX90A-NEXT: v_add_f32_e32 v4, v5, v2
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[0,1]
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[16:19], 0 offen offset:1024 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
|
||||
@ -47,6 +47,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_max_f32_e32 v4, v0, v2
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -216,6 +217,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__offset__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_max_f32_e32 v0, v0, v2
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v3, s[0:3], 0 offen offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -418,6 +420,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__waterfall__amdgp
|
||||
; GFX942-NEXT: s_mov_b64 s[8:9], exec
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[8:9]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: .LBB2_4: ; Parent Loop BB2_3 Depth=1
|
||||
; GFX942-NEXT: ; => This Inner Loop Header: Depth=2
|
||||
; GFX942-NEXT: v_readfirstlane_b32 s4, v0
|
||||
@ -768,6 +771,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_remote
|
||||
; GFX942-NEXT: v_max_f32_e32 v4, v0, v2
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1018,6 +1022,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_max_f32_e32 v4, v0, v2
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1203,6 +1208,7 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1384,6 +1390,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__offset__amdgpu_no_fine_
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1604,6 +1611,7 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__waterfall__amdg
|
||||
; GFX942-NEXT: v_mov_b32_e32 v6, v5
|
||||
; GFX942-NEXT: s_mov_b64 s[2:3], exec
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX942-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GFX942-NEXT: v_readfirstlane_b32 s5, v1
|
||||
@ -1984,6 +1992,7 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_remot
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2249,6 +2258,7 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2518,6 +2528,7 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_and_or_b32 v0, v1, s7, v0
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2970,6 +2981,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_and_or_b32 v0, v1, s7, v0
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3501,6 +3513,7 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu
|
||||
; GFX942-NEXT: s_mov_b64 s[8:9], exec
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: .LBB12_4: ; Parent Loop BB12_3 Depth=1
|
||||
; GFX942-NEXT: ; => This Inner Loop Header: Depth=2
|
||||
; GFX942-NEXT: v_readfirstlane_b32 s4, v0
|
||||
@ -4247,6 +4260,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__amdgpu_no_fine
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_max_f32_e32 v0, v0, v5
|
||||
; GFX942-NEXT: v_bfe_u32 v2, v0, 16, 1
|
||||
; GFX942-NEXT: v_or_b32_e32 v3, 0x400000, v0
|
||||
@ -4766,6 +4780,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset__amdgpu_no_fine
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_max_f32_e32 v0, v0, v3
|
||||
; GFX942-NEXT: v_bfe_u32 v4, v0, 16, 1
|
||||
; GFX942-NEXT: v_or_b32_e32 v5, 0x400000, v0
|
||||
@ -5368,7 +5383,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amd
|
||||
; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v4
|
||||
; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v6, v7, v10, v4
|
||||
@ -6077,6 +6092,7 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__amdgpu_no
|
||||
; GFX942-NEXT: v_mov_b32_e32 v5, v0
|
||||
; GFX942-NEXT: v_pk_max_f16 v0, v5, v5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_pk_max_f16 v4, v0, v2
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
|
||||
@ -6377,6 +6393,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_v2f16__offset__amdgpu_no_fin
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_pk_max_f16 v0, v1, v1
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_pk_max_f16 v0, v0, v2
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
|
||||
@ -6720,6 +6737,7 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__waterfall
|
||||
; GFX942-NEXT: s_mov_b64 s[8:9], exec
|
||||
; GFX942-NEXT: v_pk_max_f16 v8, v6, v5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[8:9]
|
||||
; GFX942-NEXT: .LBB18_4: ; Parent Loop BB18_3 Depth=1
|
||||
; GFX942-NEXT: ; => This Inner Loop Header: Depth=2
|
||||
@ -7345,6 +7363,7 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__amdgpu
|
||||
; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
|
||||
; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v0, v5, v6, s[4:5]
|
||||
; GFX942-NEXT: v_perm_b32 v6, v1, v0, s9
|
||||
@ -7865,6 +7884,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_v2bf16__offset__amdgpu_no_fi
|
||||
; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
|
||||
; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[4:5]
|
||||
; GFX942-NEXT: v_perm_b32 v0, v5, v0, s9
|
||||
@ -8455,6 +8475,7 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterf
|
||||
; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
|
||||
; GFX942-NEXT: s_mov_b64 s[8:9], exec
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v8, vcc
|
||||
; GFX942-NEXT: v_and_b32_e32 v7, 0xffff0000, v9
|
||||
; GFX942-NEXT: v_max_f32_e32 v7, v7, v5
|
||||
@ -9157,6 +9178,7 @@ define float @buffer_fat_ptr_system_atomic_fmax_ret_f32__offset__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_max_f32_e32 v4, v0, v2
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -9207,6 +9229,7 @@ define float @buffer_fat_ptr_system_atomic_fmax_ret_f32__offset__amdgpu_no_fine_
|
||||
; GFX90A-NEXT: v_max_f32_e32 v4, v0, v2
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[0,1]
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[16:19], 0 offen offset:1024 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
|
||||
@ -47,6 +47,7 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_min_f32_e32 v4, v0, v2
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -216,6 +217,7 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f32__offset__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_min_f32_e32 v0, v0, v2
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v3, s[0:3], 0 offen offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -418,6 +420,7 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__waterfall__amdgp
|
||||
; GFX942-NEXT: s_mov_b64 s[8:9], exec
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[8:9]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: .LBB2_4: ; Parent Loop BB2_3 Depth=1
|
||||
; GFX942-NEXT: ; => This Inner Loop Header: Depth=2
|
||||
; GFX942-NEXT: v_readfirstlane_b32 s4, v0
|
||||
@ -768,6 +771,7 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_remote
|
||||
; GFX942-NEXT: v_min_f32_e32 v4, v0, v2
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1018,6 +1022,7 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_min_f32_e32 v4, v0, v2
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1203,6 +1208,7 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1384,6 +1390,7 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f64__offset__amdgpu_no_fine_
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1604,6 +1611,7 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__waterfall__amdg
|
||||
; GFX942-NEXT: v_mov_b32_e32 v6, v5
|
||||
; GFX942-NEXT: s_mov_b64 s[2:3], exec
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX942-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GFX942-NEXT: v_readfirstlane_b32 s5, v1
|
||||
@ -1984,6 +1992,7 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_remot
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2249,6 +2258,7 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, s16
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2518,6 +2528,7 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_and_or_b32 v0, v1, s7, v0
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2970,6 +2981,7 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_and_or_b32 v0, v1, s7, v0
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3501,6 +3513,7 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu
|
||||
; GFX942-NEXT: s_mov_b64 s[8:9], exec
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: .LBB12_4: ; Parent Loop BB12_3 Depth=1
|
||||
; GFX942-NEXT: ; => This Inner Loop Header: Depth=2
|
||||
; GFX942-NEXT: v_readfirstlane_b32 s4, v0
|
||||
@ -4247,6 +4260,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__amdgpu_no_fine
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_min_f32_e32 v0, v0, v5
|
||||
; GFX942-NEXT: v_bfe_u32 v2, v0, 16, 1
|
||||
; GFX942-NEXT: v_or_b32_e32 v3, 0x400000, v0
|
||||
@ -4766,6 +4780,7 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset__amdgpu_no_fine
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_min_f32_e32 v0, v0, v3
|
||||
; GFX942-NEXT: v_bfe_u32 v4, v0, 16, 1
|
||||
; GFX942-NEXT: v_or_b32_e32 v5, 0x400000, v0
|
||||
@ -5368,7 +5383,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amd
|
||||
; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v4
|
||||
; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v6, v7, v10, v4
|
||||
@ -6077,6 +6092,7 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__amdgpu_no
|
||||
; GFX942-NEXT: v_mov_b32_e32 v5, v0
|
||||
; GFX942-NEXT: v_pk_max_f16 v0, v5, v5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_pk_min_f16 v4, v0, v2
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
|
||||
@ -6377,6 +6393,7 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_v2f16__offset__amdgpu_no_fin
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_pk_max_f16 v0, v1, v1
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_pk_min_f16 v0, v0, v2
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
|
||||
@ -6720,6 +6737,7 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__waterfall
|
||||
; GFX942-NEXT: s_mov_b64 s[8:9], exec
|
||||
; GFX942-NEXT: v_pk_min_f16 v8, v6, v5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[8:9]
|
||||
; GFX942-NEXT: .LBB18_4: ; Parent Loop BB18_3 Depth=1
|
||||
; GFX942-NEXT: ; => This Inner Loop Header: Depth=2
|
||||
@ -7345,6 +7363,7 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__amdgpu
|
||||
; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
|
||||
; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v0, v5, v6, s[4:5]
|
||||
; GFX942-NEXT: v_perm_b32 v6, v1, v0, s9
|
||||
@ -7865,6 +7884,7 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_v2bf16__offset__amdgpu_no_fi
|
||||
; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
|
||||
; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[4:5]
|
||||
; GFX942-NEXT: v_perm_b32 v0, v5, v0, s9
|
||||
@ -8455,6 +8475,7 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterf
|
||||
; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
|
||||
; GFX942-NEXT: s_mov_b64 s[8:9], exec
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v8, vcc
|
||||
; GFX942-NEXT: v_and_b32_e32 v7, 0xffff0000, v9
|
||||
; GFX942-NEXT: v_min_f32_e32 v7, v7, v5
|
||||
@ -9157,6 +9178,7 @@ define float @buffer_fat_ptr_system_atomic_fmin_ret_f32__offset__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_min_f32_e32 v4, v0, v2
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen offset:1024 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -9207,6 +9229,7 @@ define float @buffer_fat_ptr_system_atomic_fmin_ret_f32__offset__amdgpu_no_fine_
|
||||
; GFX90A-NEXT: v_min_f32_e32 v4, v0, v2
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[0,1]
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[16:19], 0 offen offset:1024 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
|
||||
@ -32,6 +32,7 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory__amd
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -208,6 +209,7 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grai
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -397,6 +399,7 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grai
|
||||
; GFX942-NEXT: s_nop 1
|
||||
; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -590,6 +593,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__am
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -796,6 +800,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1016,6 +1021,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: s_nop 1
|
||||
; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1237,6 +1243,7 @@ define float @flat_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1304,6 +1311,7 @@ define float @flat_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX90A-NEXT: s_cbranch_execz .LBB6_5
|
||||
; GFX90A-NEXT: ; %bb.4: ; %atomicrmw.global
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_add_f32 v0, v[4:5], v2, off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1426,6 +1434,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1492,6 +1501,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX90A-NEXT: s_cbranch_execz .LBB7_5
|
||||
; GFX90A-NEXT: ; %bb.4: ; %atomicrmw.global
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_add_f32 v[0:1], v2, off
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1645,6 +1655,7 @@ define void @flat_agent_atomic_fadd_noret_f32_maybe_remote(ptr %ptr, float %val)
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1809,6 +1820,7 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory(pt
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1960,6 +1972,7 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__a
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2177,6 +2190,7 @@ define void @flat_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode(ptr %p
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2345,6 +2359,7 @@ define float @flat_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2521,6 +2536,7 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2710,6 +2726,7 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine
|
||||
; GFX942-NEXT: s_nop 1
|
||||
; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2903,6 +2920,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memor
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3109,6 +3127,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fin
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3329,6 +3348,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fin
|
||||
; GFX942-NEXT: s_nop 1
|
||||
; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3550,6 +3570,7 @@ define float @flat_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fin
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -3617,6 +3638,7 @@ define float @flat_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fin
|
||||
; GFX90A-NEXT: s_cbranch_execz .LBB18_5
|
||||
; GFX90A-NEXT: ; %bb.4: ; %atomicrmw.global
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_add_f32 v0, v[4:5], v2, off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -3739,6 +3761,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -3805,6 +3828,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
|
||||
; GFX90A-NEXT: s_cbranch_execz .LBB19_5
|
||||
; GFX90A-NEXT: ; %bb.4: ; %atomicrmw.global
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_add_f32 v[0:1], v2, off
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -3959,6 +3983,7 @@ define float @flat_agent_atomic_fadd_ret_f32__ieee__amdgpu_no_fine_grained_memor
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -4026,6 +4051,7 @@ define float @flat_agent_atomic_fadd_ret_f32__ieee__amdgpu_no_fine_grained_memor
|
||||
; GFX90A-NEXT: s_cbranch_execz .LBB20_5
|
||||
; GFX90A-NEXT: ; %bb.4: ; %atomicrmw.global
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_add_f32 v0, v[4:5], v2, off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -4148,6 +4174,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memo
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -4214,6 +4241,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memo
|
||||
; GFX90A-NEXT: s_cbranch_execz .LBB21_5
|
||||
; GFX90A-NEXT: ; %bb.4: ; %atomicrmw.global
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_add_f32 v[0:1], v2, off
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -4367,6 +4395,7 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ig
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4531,6 +4560,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_i
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4688,6 +4718,7 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory(ptr %ptr,
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4852,6 +4883,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory(ptr %ptr,
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5009,6 +5041,7 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdg
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5185,6 +5218,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amd
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5391,6 +5425,7 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdg
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5539,6 +5574,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amd
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5752,6 +5788,7 @@ define double @flat_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB30_5
|
||||
; GFX942-NEXT: ; %bb.4: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[4:5], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6193,6 +6230,7 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB31_5
|
||||
; GFX942-NEXT: ; %bb.4: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[4:5], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6654,6 +6692,7 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB32_5
|
||||
; GFX942-NEXT: ; %bb.4: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[4:5], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7105,6 +7144,7 @@ define void @flat_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB33_5
|
||||
; GFX942-NEXT: ; %bb.4: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[2:3], off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7539,6 +7579,7 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB34_5
|
||||
; GFX942-NEXT: ; %bb.4: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[2:3], off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7988,6 +8029,7 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB35_5
|
||||
; GFX942-NEXT: ; %bb.4: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[2:3], off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8462,6 +8504,7 @@ define half @flat_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory(ptr %
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8834,6 +8877,7 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grain
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -9215,6 +9259,7 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grain
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -9588,6 +9633,7 @@ define void @flat_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -9948,6 +9994,7 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -10317,6 +10364,7 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -10651,6 +10699,7 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_
|
||||
; GFX942-NEXT: v_add_f16_e32 v3, v5, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -10925,6 +10974,7 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fi
|
||||
; GFX942-NEXT: v_add_f16_e32 v3, v5, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -11241,6 +11291,7 @@ define half @flat_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grai
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -11390,6 +11441,7 @@ define half @flat_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grai
|
||||
; GFX90A-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX90A-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -11622,6 +11674,7 @@ define void @flat_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -11766,6 +11819,7 @@ define void @flat_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX90A-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX90A-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -12026,6 +12080,7 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory(pt
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -12482,6 +12537,7 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -12948,6 +13004,7 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -13409,6 +13466,7 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -13861,6 +13919,7 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14282,6 +14341,7 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14649,6 +14709,7 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -15037,6 +15098,7 @@ define void @flat_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory(pt
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -15485,6 +15547,7 @@ define bfloat @flat_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -15670,6 +15733,7 @@ define bfloat @flat_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX90A-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -15950,6 +16014,7 @@ define void @flat_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -16129,6 +16194,7 @@ define void @flat_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX90A-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -16284,6 +16350,7 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memo
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -16466,6 +16533,7 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fi
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -16656,6 +16724,7 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fi
|
||||
; GFX942-NEXT: s_nop 1
|
||||
; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -16853,6 +16922,7 @@ define void @flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_f16 v[0:1], v2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17026,6 +17096,7 @@ define void @flat_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:2044
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17209,6 +17280,7 @@ define void @flat_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: s_nop 1
|
||||
; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_f16 v[0:1], v2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17403,6 +17475,7 @@ define <2 x half> @flat_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_f
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -17469,6 +17542,7 @@ define <2 x half> @flat_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_f
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_add_f16 v4, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -17593,6 +17667,7 @@ define void @flat_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:2044 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -17657,6 +17732,7 @@ define void @flat_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_add_f16 v4, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -17775,6 +17851,7 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory(ptr
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17957,6 +18034,7 @@ define void @flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory(ptr %pt
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_f16 v[0:1], v2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -18130,6 +18208,7 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memo
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -18312,6 +18391,7 @@ define void @flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_f16 v[0:1], v2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -18489,6 +18569,7 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_m
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -18815,6 +18896,7 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -19149,6 +19231,7 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no
|
||||
; GFX942-NEXT: s_nop 1
|
||||
; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -19491,6 +19574,7 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory(
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -19805,6 +19889,7 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:2044
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -20129,6 +20214,7 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_
|
||||
; GFX942-NEXT: s_nop 1
|
||||
; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -20467,6 +20553,7 @@ define <2 x bfloat> @flat_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_n
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -20637,6 +20724,7 @@ define <2 x bfloat> @flat_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_n
|
||||
; GFX90A-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc
|
||||
; GFX90A-NEXT: v_perm_b32 v2, v6, v2, s9
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -20801,6 +20889,7 @@ define void @flat_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:2044 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -20967,6 +21056,7 @@ define void @flat_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX90A-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc
|
||||
; GFX90A-NEXT: v_perm_b32 v2, v6, v2, s9
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -21124,6 +21214,7 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memory(
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -21450,6 +21541,7 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory(ptr %p
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -21764,6 +21856,7 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_m
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -22090,6 +22183,7 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory_
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
|
||||
@ -40,6 +40,7 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -182,6 +183,7 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grai
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -336,6 +338,7 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grai
|
||||
; GFX942-NEXT: v_max_f32_e32 v0, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v0, v1
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -494,6 +497,7 @@ define void @flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -634,6 +638,7 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -787,6 +792,7 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -948,6 +954,7 @@ define float @flat_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -995,6 +1002,7 @@ define float @flat_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1099,6 +1107,7 @@ define void @flat_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1147,6 +1156,7 @@ define void @flat_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1248,6 +1258,7 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory(ptr %ptr,
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1440,6 +1451,7 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__amd
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1586,6 +1598,7 @@ define float @flat_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memory
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1728,6 +1741,7 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1882,6 +1896,7 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine
|
||||
; GFX942-NEXT: v_max_f32_e32 v0, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v0, v1
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2040,6 +2055,7 @@ define void @flat_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_memor
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2180,6 +2196,7 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fin
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2333,6 +2350,7 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fin
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2494,6 +2512,7 @@ define float @flat_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fin
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -2541,6 +2560,7 @@ define float @flat_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fin
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2645,6 +2665,7 @@ define void @flat_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -2693,6 +2714,7 @@ define void @flat_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2851,6 +2873,7 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX942-NEXT: .LBB18_3: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3247,6 +3270,7 @@ define double @flat_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX942-NEXT: .LBB19_3: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3659,6 +3683,7 @@ define double @flat_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX942-NEXT: .LBB20_3: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4063,6 +4088,7 @@ define void @flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX942-NEXT: .LBB21_3: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4453,6 +4479,7 @@ define void @flat_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX942-NEXT: .LBB22_3: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4858,6 +4885,7 @@ define void @flat_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX942-NEXT: .LBB23_3: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5257,6 +5285,7 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_remote_memory(ptr %ptr,
|
||||
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX942-NEXT: .LBB24_3: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5680,6 +5709,7 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__am
|
||||
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX942-NEXT: .LBB25_3: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6109,6 +6139,7 @@ define half @flat_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory(ptr %
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6504,6 +6535,7 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grain
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6910,6 +6942,7 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grain
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7305,6 +7338,7 @@ define void @flat_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7688,6 +7722,7 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8082,6 +8117,7 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8441,6 +8477,7 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fi
|
||||
; GFX942-NEXT: v_max_f16_e32 v2, v2, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s2, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8738,6 +8775,7 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_
|
||||
; GFX942-NEXT: v_max_f16_e32 v2, v2, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s2, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -9071,6 +9109,7 @@ define half @flat_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grai
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -9231,6 +9270,7 @@ define half @flat_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grai
|
||||
; GFX90A-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -9477,6 +9517,7 @@ define void @flat_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -9632,6 +9673,7 @@ define void @flat_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX90A-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -9896,6 +9938,7 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory(pt
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -10353,6 +10396,7 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -10820,6 +10864,7 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -11276,6 +11321,7 @@ define void @flat_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory(pt
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -11719,6 +11765,7 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -12172,6 +12219,7 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -12594,6 +12642,7 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -12962,6 +13011,7 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -13363,6 +13413,7 @@ define bfloat @flat_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -13548,6 +13599,7 @@ define bfloat @flat_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX90A-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -13829,6 +13881,7 @@ define void @flat_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -14008,6 +14061,7 @@ define void @flat_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX90A-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -14192,6 +14246,7 @@ define <2 x half> @flat_agent_atomic_fmax_ret_v2f16__amdgpu_no_fine_grained_memo
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_max_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14422,6 +14477,7 @@ define <2 x half> @flat_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fi
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_max_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14663,6 +14719,7 @@ define <2 x half> @flat_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_fi
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_max_f16 v2, v0, v1
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14906,6 +14963,7 @@ define void @flat_agent_atomic_fmax_noret_v2f16__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_max_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -15126,6 +15184,7 @@ define void @flat_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_max_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -15359,6 +15418,7 @@ define void @flat_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_max_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -15601,6 +15661,7 @@ define <2 x half> @flat_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_f
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_max_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -15681,6 +15742,7 @@ define <2 x half> @flat_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_f
|
||||
; GFX90A-NEXT: v_pk_max_f16 v2, v3, v3
|
||||
; GFX90A-NEXT: v_pk_max_f16 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -15838,6 +15900,7 @@ define void @flat_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_max_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -15916,6 +15979,7 @@ define void @flat_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX90A-NEXT: v_pk_max_f16 v2, v3, v3
|
||||
; GFX90A-NEXT: v_pk_max_f16 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -16159,6 +16223,7 @@ define <2 x bfloat> @flat_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained_m
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -16608,6 +16673,7 @@ define <2 x bfloat> @flat_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17068,6 +17134,7 @@ define <2 x bfloat> @flat_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v0, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v2, v0, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17528,6 +17595,7 @@ define void @flat_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memory(
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17960,6 +18028,7 @@ define void @flat_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -18405,6 +18474,7 @@ define void @flat_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -18866,6 +18936,7 @@ define <2 x bfloat> @flat_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_n
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -19044,6 +19115,7 @@ define <2 x bfloat> @flat_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_n
|
||||
; GFX90A-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc
|
||||
; GFX90A-NEXT: v_perm_b32 v2, v6, v2, s9
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -19320,6 +19392,7 @@ define void @flat_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -19493,6 +19566,7 @@ define void @flat_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX90A-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc
|
||||
; GFX90A-NEXT: v_perm_b32 v2, v6, v2, s9
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
|
||||
@ -40,6 +40,7 @@ define float @flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -182,6 +183,7 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grai
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -336,6 +338,7 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_grai
|
||||
; GFX942-NEXT: v_max_f32_e32 v0, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v0, v1
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -494,6 +497,7 @@ define void @flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -634,6 +638,7 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -787,6 +792,7 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -948,6 +954,7 @@ define float @flat_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -995,6 +1002,7 @@ define float @flat_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX90A-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1099,6 +1107,7 @@ define void @flat_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1147,6 +1156,7 @@ define void @flat_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX90A-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1248,6 +1258,7 @@ define float @flat_agent_atomic_fmin_ret_f32__amdgpu_no_remote_memory(ptr %ptr,
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1440,6 +1451,7 @@ define float @flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory__amd
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1586,6 +1598,7 @@ define float @flat_agent_atomic_fmin_ret_f32__ftz__amdgpu_no_fine_grained_memory
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1728,6 +1741,7 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1882,6 +1896,7 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fine
|
||||
; GFX942-NEXT: v_max_f32_e32 v0, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v0, v1
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2040,6 +2055,7 @@ define void @flat_agent_atomic_fmin_noret_f32__ftz__amdgpu_no_fine_grained_memor
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2180,6 +2196,7 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fin
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2333,6 +2350,7 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_fin
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2494,6 +2512,7 @@ define float @flat_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fin
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -2541,6 +2560,7 @@ define float @flat_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fin
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX90A-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2645,6 +2665,7 @@ define void @flat_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -2693,6 +2714,7 @@ define void @flat_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX90A-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2851,6 +2873,7 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX942-NEXT: .LBB18_3: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3247,6 +3270,7 @@ define double @flat_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX942-NEXT: .LBB19_3: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3659,6 +3683,7 @@ define double @flat_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX942-NEXT: .LBB20_3: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4063,6 +4088,7 @@ define void @flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX942-NEXT: .LBB21_3: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4453,6 +4479,7 @@ define void @flat_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX942-NEXT: .LBB22_3: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4858,6 +4885,7 @@ define void @flat_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX942-NEXT: .LBB23_3: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5257,6 +5285,7 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_remote_memory(ptr %ptr,
|
||||
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX942-NEXT: .LBB24_3: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5680,6 +5709,7 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__am
|
||||
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX942-NEXT: .LBB25_3: ; %atomicrmw.global
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6109,6 +6139,7 @@ define half @flat_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory(ptr %
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6504,6 +6535,7 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grain
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6910,6 +6942,7 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grain
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7305,6 +7338,7 @@ define void @flat_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7688,6 +7722,7 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8082,6 +8117,7 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8441,6 +8477,7 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fi
|
||||
; GFX942-NEXT: v_min_f16_e32 v2, v2, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s2, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8738,6 +8775,7 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_
|
||||
; GFX942-NEXT: v_min_f16_e32 v2, v2, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s2, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -9071,6 +9109,7 @@ define half @flat_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grai
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -9231,6 +9270,7 @@ define half @flat_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grai
|
||||
; GFX90A-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -9477,6 +9517,7 @@ define void @flat_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -9632,6 +9673,7 @@ define void @flat_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX90A-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -9896,6 +9938,7 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory(pt
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -10353,6 +10396,7 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -10820,6 +10864,7 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -11276,6 +11321,7 @@ define void @flat_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory(pt
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -11719,6 +11765,7 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -12172,6 +12219,7 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -12594,6 +12642,7 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -12962,6 +13011,7 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -13363,6 +13413,7 @@ define bfloat @flat_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -13548,6 +13599,7 @@ define bfloat @flat_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX90A-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -13829,6 +13881,7 @@ define void @flat_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -14008,6 +14061,7 @@ define void @flat_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX90A-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -14192,6 +14246,7 @@ define <2 x half> @flat_agent_atomic_fmin_ret_v2f16__amdgpu_no_fine_grained_memo
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_min_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14422,6 +14477,7 @@ define <2 x half> @flat_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fi
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_min_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14663,6 +14719,7 @@ define <2 x half> @flat_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_fi
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_min_f16 v2, v0, v1
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14906,6 +14963,7 @@ define void @flat_agent_atomic_fmin_noret_v2f16__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_min_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -15126,6 +15184,7 @@ define void @flat_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_min_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -15359,6 +15418,7 @@ define void @flat_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_min_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -15601,6 +15661,7 @@ define <2 x half> @flat_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_f
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_min_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -15681,6 +15742,7 @@ define <2 x half> @flat_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_f
|
||||
; GFX90A-NEXT: v_pk_max_f16 v2, v3, v3
|
||||
; GFX90A-NEXT: v_pk_min_f16 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -15838,6 +15900,7 @@ define void @flat_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_min_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -15916,6 +15979,7 @@ define void @flat_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX90A-NEXT: v_pk_max_f16 v2, v3, v3
|
||||
; GFX90A-NEXT: v_pk_min_f16 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -16159,6 +16223,7 @@ define <2 x bfloat> @flat_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained_m
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -16608,6 +16673,7 @@ define <2 x bfloat> @flat_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17068,6 +17134,7 @@ define <2 x bfloat> @flat_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v0, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v2, v0, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17528,6 +17595,7 @@ define void @flat_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memory(
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17960,6 +18028,7 @@ define void @flat_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -18405,6 +18474,7 @@ define void @flat_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -18866,6 +18936,7 @@ define <2 x bfloat> @flat_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_n
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -19044,6 +19115,7 @@ define <2 x bfloat> @flat_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_n
|
||||
; GFX90A-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc
|
||||
; GFX90A-NEXT: v_perm_b32 v2, v6, v2, s9
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -19320,6 +19392,7 @@ define void @flat_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -19493,6 +19566,7 @@ define void @flat_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX90A-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc
|
||||
; GFX90A-NEXT: v_perm_b32 v2, v6, v2, s9
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
|
||||
@ -55,6 +55,7 @@ define float @flat_agent_atomic_fsub_ret_f32(ptr %ptr, float %val) #0 {
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -250,6 +251,7 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_pos(ptr %ptr, float %val
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -455,6 +457,7 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_neg(ptr %ptr, float %val
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v6, v7, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[6:7] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -661,6 +664,7 @@ define void @flat_agent_atomic_fsub_noret_f32(ptr %ptr, float %val) #0 {
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -846,6 +850,7 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_pos(ptr %ptr, float %va
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1044,6 +1049,7 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_neg(ptr %ptr, float %va
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1252,6 +1258,7 @@ define float @flat_system_atomic_fsub_ret_f32__offset12b_pos(ptr %ptr, float %va
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1326,6 +1333,7 @@ define float @flat_system_atomic_fsub_ret_f32__offset12b_pos(ptr %ptr, float %va
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1452,6 +1460,7 @@ define void @flat_system_atomic_fsub_noret_f32__offset12b_pos(ptr %ptr, float %v
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1523,6 +1532,7 @@ define void @flat_system_atomic_fsub_noret_f32__offset12b_pos(ptr %ptr, float %v
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1652,6 +1662,7 @@ define float @flat_agent_atomic_fsub_ret_f32__ftz(ptr %ptr, float %val) #1 {
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1847,6 +1858,7 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr %ptr, float
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2052,6 +2064,7 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_neg__ftz(ptr %ptr, float
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v6, v7, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[6:7] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2258,6 +2271,7 @@ define void @flat_agent_atomic_fsub_noret_f32__ftz(ptr %ptr, float %val) #1 {
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2443,6 +2457,7 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr %ptr, floa
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2641,6 +2656,7 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_neg__ftz(ptr %ptr, floa
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2849,6 +2865,7 @@ define float @flat_system_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr %ptr, floa
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -2923,6 +2940,7 @@ define float @flat_system_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr %ptr, floa
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -3049,6 +3067,7 @@ define void @flat_system_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr %ptr, flo
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -3120,6 +3139,7 @@ define void @flat_system_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr %ptr, flo
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -3281,6 +3301,7 @@ define double @flat_agent_atomic_fsub_ret_f64(ptr %ptr, double %val) #0 {
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3707,6 +3728,7 @@ define double @flat_agent_atomic_fsub_ret_f64__offset12b_pos(ptr %ptr, double %v
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4153,6 +4175,7 @@ define double @flat_agent_atomic_fsub_ret_f64__offset12b_neg(ptr %ptr, double %v
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4590,6 +4613,7 @@ define void @flat_agent_atomic_fsub_noret_f64(ptr %ptr, double %val) #0 {
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5011,6 +5035,7 @@ define void @flat_agent_atomic_fsub_noret_f64__offset12b_pos(ptr %ptr, double %v
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5447,6 +5472,7 @@ define void @flat_agent_atomic_fsub_noret_f64__offset12b_neg(ptr %ptr, double %v
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5909,6 +5935,7 @@ define half @flat_agent_atomic_fsub_ret_f16(ptr %ptr, half %val) #0 {
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6281,6 +6308,7 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_pos(ptr %ptr, half %val)
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6662,6 +6690,7 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_neg(ptr %ptr, half %val)
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7035,6 +7064,7 @@ define void @flat_agent_atomic_fsub_noret_f16(ptr %ptr, half %val) #0 {
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7395,6 +7425,7 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b_pos(ptr %ptr, half %val
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7764,6 +7795,7 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b_neg(ptr %ptr, half %val
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8101,6 +8133,7 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_pos__align4(ptr %ptr, hal
|
||||
; GFX942-NEXT: v_sub_f16_e32 v3, v5, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8378,6 +8411,7 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b__align4_pos(ptr %ptr, h
|
||||
; GFX942-NEXT: v_sub_f16_e32 v3, v5, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8688,6 +8722,7 @@ define half @flat_system_atomic_fsub_ret_f16__offset12b_pos(ptr %ptr, half %val)
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -8837,6 +8872,7 @@ define half @flat_system_atomic_fsub_ret_f16__offset12b_pos(ptr %ptr, half %val)
|
||||
; GFX90A-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX90A-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -9069,6 +9105,7 @@ define void @flat_system_atomic_fsub_noret_f16__offset12b_pos(ptr %ptr, half %va
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -9213,6 +9250,7 @@ define void @flat_system_atomic_fsub_noret_f16__offset12b_pos(ptr %ptr, half %va
|
||||
; GFX90A-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX90A-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -9473,6 +9511,7 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16(ptr %ptr, bfloat %val) #0 {
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -9929,6 +9968,7 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_pos(ptr %ptr, bfloat %
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -10395,6 +10435,7 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_neg(ptr %ptr, bfloat %
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -10850,6 +10891,7 @@ define void @flat_agent_atomic_fsub_noret_bf16(ptr %ptr, bfloat %val) #0 {
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -11292,6 +11334,7 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b_pos(ptr %ptr, bfloat %
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -11744,6 +11787,7 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b_neg(ptr %ptr, bfloat %
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -12165,6 +12209,7 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_pos__align4(ptr %ptr,
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -12532,6 +12577,7 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b__align4_pos(ptr %ptr,
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -12932,6 +12978,7 @@ define bfloat @flat_system_atomic_fsub_ret_bf16__offset12b_pos(ptr %ptr, bfloat
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -13117,6 +13164,7 @@ define bfloat @flat_system_atomic_fsub_ret_bf16__offset12b_pos(ptr %ptr, bfloat
|
||||
; GFX90A-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -13397,6 +13445,7 @@ define void @flat_system_atomic_fsub_noret_bf16__offset12b_pos(ptr %ptr, bfloat
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -13576,6 +13625,7 @@ define void @flat_system_atomic_fsub_noret_bf16__offset12b_pos(ptr %ptr, bfloat
|
||||
; GFX90A-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -13754,6 +13804,7 @@ define <2 x half> @flat_agent_atomic_fsub_ret_v2f16(ptr %ptr, <2 x half> %val) #
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -13967,6 +14018,7 @@ define <2 x half> @flat_agent_atomic_fsub_ret_v2f16__offset12b_pos(ptr %ptr, <2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14191,6 +14243,7 @@ define <2 x half> @flat_agent_atomic_fsub_ret_v2f16__offset12b_neg(ptr %ptr, <2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_pk_add_f16 v6, v7, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[6:7] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14416,6 +14469,7 @@ define void @flat_agent_atomic_fsub_noret_v2f16(ptr %ptr, <2 x half> %val) #0 {
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14617,6 +14671,7 @@ define void @flat_agent_atomic_fsub_noret_v2f16__offset12b_pos(ptr %ptr, <2 x ha
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14831,6 +14886,7 @@ define void @flat_agent_atomic_fsub_noret_v2f16__offset12b_neg(ptr %ptr, <2 x ha
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -15055,6 +15111,7 @@ define <2 x half> @flat_system_atomic_fsub_ret_v2f16__offset12b_pos(ptr %ptr, <2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -15129,6 +15186,7 @@ define <2 x half> @flat_system_atomic_fsub_ret_v2f16__offset12b_pos(ptr %ptr, <2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -15274,6 +15332,7 @@ define void @flat_system_atomic_fsub_noret_v2f16__offset12b_pos(ptr %ptr, <2 x h
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -15345,6 +15404,7 @@ define void @flat_system_atomic_fsub_noret_v2f16__offset12b_pos(ptr %ptr, <2 x h
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -15582,6 +15642,7 @@ define <2 x bfloat> @flat_agent_atomic_fsub_ret_v2bf16(ptr %ptr, <2 x bfloat> %v
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -16031,6 +16092,7 @@ define <2 x bfloat> @flat_agent_atomic_fsub_ret_v2bf16__offset12b_pos(ptr %ptr,
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -16491,6 +16553,7 @@ define <2 x bfloat> @flat_agent_atomic_fsub_ret_v2bf16__offset12b_neg(ptr %ptr,
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v0, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v2, v0, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -16951,6 +17014,7 @@ define void @flat_agent_atomic_fsub_noret_v2bf16(ptr %ptr, <2 x bfloat> %val) #0
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17383,6 +17447,7 @@ define void @flat_agent_atomic_fsub_noret_v2bf16__offset12b_pos(ptr %ptr, <2 x b
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17828,6 +17893,7 @@ define void @flat_agent_atomic_fsub_noret_v2bf16__offset12b_neg(ptr %ptr, <2 x b
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -18289,6 +18355,7 @@ define <2 x bfloat> @flat_system_atomic_fsub_ret_v2bf16__offset12b_pos(ptr %ptr,
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -18467,6 +18534,7 @@ define <2 x bfloat> @flat_system_atomic_fsub_ret_v2bf16__offset12b_pos(ptr %ptr,
|
||||
; GFX90A-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc
|
||||
; GFX90A-NEXT: v_perm_b32 v2, v6, v2, s9
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -18743,6 +18811,7 @@ define void @flat_system_atomic_fsub_noret_v2bf16__offset12b_pos(ptr %ptr, <2 x
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -18916,6 +18985,7 @@ define void @flat_system_atomic_fsub_noret_v2bf16__offset12b_pos(ptr %ptr, <2 x
|
||||
; GFX90A-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc
|
||||
; GFX90A-NEXT: v_perm_b32 v2, v6, v2, s9
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
|
||||
@ -25,6 +25,7 @@ define amdgpu_ps void @flat_xchg_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_swap v[0:1], v2
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -37,6 +38,7 @@ define amdgpu_ps void @flat_xchg_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_swap v[2:3], v1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -66,6 +68,7 @@ define amdgpu_ps void @flat_xchg_saddr_i32_nortn_offset_2047(ptr inreg %sbase, i
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_swap v[0:1], v2 offset:2047
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -78,6 +81,7 @@ define amdgpu_ps void @flat_xchg_saddr_i32_nortn_offset_2047(ptr inreg %sbase, i
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_swap v[2:3], v1 offset:2047
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -111,6 +115,7 @@ define amdgpu_ps void @flat_xchg_saddr_i32_nortn_offset_neg2048(ptr inreg %sbase
|
||||
; GFX950-SDAG-NEXT: s_nop 1
|
||||
; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_swap v[0:1], v2
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -126,6 +131,7 @@ define amdgpu_ps void @flat_xchg_saddr_i32_nortn_offset_neg2048(ptr inreg %sbase
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_swap v[2:3], v1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -155,6 +161,7 @@ define amdgpu_ps float @flat_xchg_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_swap v0, v[0:1], v2 sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -167,6 +174,7 @@ define amdgpu_ps float @flat_xchg_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_swap v0, v[2:3], v1 sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -196,6 +204,7 @@ define amdgpu_ps float @flat_xchg_saddr_i32_rtn_2048(ptr inreg %sbase, i32 %voff
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:2048 sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -208,6 +217,7 @@ define amdgpu_ps float @flat_xchg_saddr_i32_rtn_2048(ptr inreg %sbase, i32 %voff
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_swap v0, v[2:3], v1 offset:2048 sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -241,6 +251,7 @@ define amdgpu_ps float @flat_xchg_saddr_i32_rtn_neg2048(ptr inreg %sbase, i32 %v
|
||||
; GFX950-SDAG-NEXT: s_nop 1
|
||||
; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_swap v0, v[0:1], v2 sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -256,6 +267,7 @@ define amdgpu_ps float @flat_xchg_saddr_i32_rtn_neg2048(ptr inreg %sbase, i32 %v
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_swap v0, v[2:3], v1 sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -317,6 +329,7 @@ define amdgpu_ps float @flat_xchg_saddr_uniform_ptr_in_vgprs_rtn(i32 %voffset, i
|
||||
; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_swap v0, v[0:1], v2 sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -331,6 +344,7 @@ define amdgpu_ps float @flat_xchg_saddr_uniform_ptr_in_vgprs_rtn(i32 %voffset, i
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_swap v0, v[2:3], v1 sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -386,6 +400,7 @@ define amdgpu_ps float @flat_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset(i32 %
|
||||
; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:42 sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -400,6 +415,7 @@ define amdgpu_ps float @flat_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset(i32 %
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_swap v0, v[2:3], v1 offset:42 sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -456,6 +472,7 @@ define amdgpu_ps void @flat_xchg_saddr_uniform_ptr_in_vgprs_nortn(i32 %voffset,
|
||||
; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_swap v[0:1], v2
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -470,6 +487,7 @@ define amdgpu_ps void @flat_xchg_saddr_uniform_ptr_in_vgprs_nortn(i32 %voffset,
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_swap v[2:3], v1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -524,6 +542,7 @@ define amdgpu_ps void @flat_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset(i32
|
||||
; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_swap v[0:1], v2 offset:42
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -538,6 +557,7 @@ define amdgpu_ps void @flat_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset(i32
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_swap v[2:3], v1 offset:42
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -671,6 +691,7 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
|
||||
; GFX950-SDAG-NEXT: s_branch .LBB10_5
|
||||
; GFX950-SDAG-NEXT: .LBB10_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_swap_x2 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -712,6 +733,7 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
|
||||
; GFX950-GISEL-NEXT: s_branch .LBB10_5
|
||||
; GFX950-GISEL-NEXT: .LBB10_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[4:5] sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -859,6 +881,7 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn_neg128(ptr inreg %sbase, i
|
||||
; GFX950-SDAG-NEXT: s_branch .LBB11_5
|
||||
; GFX950-SDAG-NEXT: .LBB11_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_swap_x2 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -903,6 +926,7 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn_neg128(ptr inreg %sbase, i
|
||||
; GFX950-GISEL-NEXT: s_branch .LBB11_5
|
||||
; GFX950-GISEL-NEXT: .LBB11_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[4:5] sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -1023,6 +1047,7 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-SDAG-NEXT: s_endpgm
|
||||
; GFX950-SDAG-NEXT: .LBB12_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3]
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -1056,6 +1081,7 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-GISEL-NEXT: s_endpgm
|
||||
; GFX950-GISEL-NEXT: .LBB12_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_swap_x2 v[0:1], v[4:5]
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -1177,6 +1203,7 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v
|
||||
; GFX950-SDAG-NEXT: s_endpgm
|
||||
; GFX950-SDAG-NEXT: .LBB13_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3]
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -1214,6 +1241,7 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v
|
||||
; GFX950-GISEL-NEXT: s_endpgm
|
||||
; GFX950-GISEL-NEXT: .LBB13_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_swap_x2 v[0:1], v[4:5]
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -1256,6 +1284,7 @@ define amdgpu_ps float @flat_add_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_add v0, v[0:1], v2 sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -1268,6 +1297,7 @@ define amdgpu_ps float @flat_add_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_add v0, v[2:3], v1 sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -1300,6 +1330,7 @@ define amdgpu_ps float @flat_add_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %vof
|
||||
; GFX950-SDAG-NEXT: s_nop 1
|
||||
; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_add v0, v[0:1], v2 sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -1315,6 +1346,7 @@ define amdgpu_ps float @flat_add_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %vof
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_add v0, v[2:3], v1 sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -1345,6 +1377,7 @@ define amdgpu_ps void @flat_add_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_add v[0:1], v2
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -1357,6 +1390,7 @@ define amdgpu_ps void @flat_add_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_add v[2:3], v1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -1388,6 +1422,7 @@ define amdgpu_ps void @flat_add_saddr_i32_nortn_neg128(ptr inreg %sbase, i32 %vo
|
||||
; GFX950-SDAG-NEXT: s_nop 1
|
||||
; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_add v[0:1], v2
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -1403,6 +1438,7 @@ define amdgpu_ps void @flat_add_saddr_i32_nortn_neg128(ptr inreg %sbase, i32 %vo
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_add v[2:3], v1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -1527,6 +1563,7 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
|
||||
; GFX950-SDAG-NEXT: s_branch .LBB18_5
|
||||
; GFX950-SDAG-NEXT: .LBB18_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_add_x2 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -1569,6 +1606,7 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
|
||||
; GFX950-GISEL-NEXT: s_branch .LBB18_5
|
||||
; GFX950-GISEL-NEXT: .LBB18_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[4:5] sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -1719,6 +1757,7 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
|
||||
; GFX950-SDAG-NEXT: s_branch .LBB19_5
|
||||
; GFX950-SDAG-NEXT: .LBB19_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_add_x2 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -1764,6 +1803,7 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
|
||||
; GFX950-GISEL-NEXT: s_branch .LBB19_5
|
||||
; GFX950-GISEL-NEXT: .LBB19_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[4:5] sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -1893,6 +1933,7 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-SDAG-NEXT: s_endpgm
|
||||
; GFX950-SDAG-NEXT: .LBB20_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_add_x2 v[0:1], v[2:3]
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -1929,6 +1970,7 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-GISEL-NEXT: s_endpgm
|
||||
; GFX950-GISEL-NEXT: .LBB20_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_add_x2 v[0:1], v[4:5]
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -2061,6 +2103,7 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
|
||||
; GFX950-SDAG-NEXT: s_endpgm
|
||||
; GFX950-SDAG-NEXT: .LBB21_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_add_x2 v[0:1], v[2:3]
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -2101,6 +2144,7 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
|
||||
; GFX950-GISEL-NEXT: s_endpgm
|
||||
; GFX950-GISEL-NEXT: .LBB21_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_add_x2 v[0:1], v[4:5]
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -2148,6 +2192,7 @@ define amdgpu_ps float @flat_sub_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_sub v0, v[0:1], v2 sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -2160,6 +2205,7 @@ define amdgpu_ps float @flat_sub_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_sub v0, v[2:3], v1 sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -2192,6 +2238,7 @@ define amdgpu_ps float @flat_sub_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %vof
|
||||
; GFX950-SDAG-NEXT: s_nop 1
|
||||
; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_sub v0, v[0:1], v2 sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -2207,6 +2254,7 @@ define amdgpu_ps float @flat_sub_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %vof
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_sub v0, v[2:3], v1 sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -2237,6 +2285,7 @@ define amdgpu_ps void @flat_sub_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_sub v[0:1], v2
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -2249,6 +2298,7 @@ define amdgpu_ps void @flat_sub_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_sub v[2:3], v1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -2280,6 +2330,7 @@ define amdgpu_ps void @flat_sub_saddr_i32_nortn_neg128(ptr inreg %sbase, i32 %vo
|
||||
; GFX950-SDAG-NEXT: s_nop 1
|
||||
; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_sub v[0:1], v2
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -2295,6 +2346,7 @@ define amdgpu_ps void @flat_sub_saddr_i32_nortn_neg128(ptr inreg %sbase, i32 %vo
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_sub v[2:3], v1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -2419,6 +2471,7 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
|
||||
; GFX950-SDAG-NEXT: s_branch .LBB26_5
|
||||
; GFX950-SDAG-NEXT: .LBB26_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_sub_x2 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -2463,6 +2516,7 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
|
||||
; GFX950-GISEL-NEXT: s_branch .LBB26_5
|
||||
; GFX950-GISEL-NEXT: .LBB26_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[4:5] sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -2613,6 +2667,7 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
|
||||
; GFX950-SDAG-NEXT: s_branch .LBB27_5
|
||||
; GFX950-SDAG-NEXT: .LBB27_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_sub_x2 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -2660,6 +2715,7 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
|
||||
; GFX950-GISEL-NEXT: s_branch .LBB27_5
|
||||
; GFX950-GISEL-NEXT: .LBB27_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[4:5] sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -2789,6 +2845,7 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-SDAG-NEXT: s_endpgm
|
||||
; GFX950-SDAG-NEXT: .LBB28_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3]
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -2827,6 +2884,7 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-GISEL-NEXT: s_endpgm
|
||||
; GFX950-GISEL-NEXT: .LBB28_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_sub_x2 v[0:1], v[4:5]
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -2959,6 +3017,7 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
|
||||
; GFX950-SDAG-NEXT: s_endpgm
|
||||
; GFX950-SDAG-NEXT: .LBB29_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3]
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -3001,6 +3060,7 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
|
||||
; GFX950-GISEL-NEXT: s_endpgm
|
||||
; GFX950-GISEL-NEXT: .LBB29_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_sub_x2 v[0:1], v[4:5]
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -3048,6 +3108,7 @@ define amdgpu_ps float @flat_and_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_and v0, v[0:1], v2 sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -3060,6 +3121,7 @@ define amdgpu_ps float @flat_and_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_and v0, v[2:3], v1 sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -3092,6 +3154,7 @@ define amdgpu_ps float @flat_and_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %vof
|
||||
; GFX950-SDAG-NEXT: s_nop 1
|
||||
; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_and v0, v[0:1], v2 sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -3107,6 +3170,7 @@ define amdgpu_ps float @flat_and_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %vof
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_and v0, v[2:3], v1 sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -3137,6 +3201,7 @@ define amdgpu_ps void @flat_and_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_and v[0:1], v2
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -3149,6 +3214,7 @@ define amdgpu_ps void @flat_and_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_and v[2:3], v1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -3180,6 +3246,7 @@ define amdgpu_ps void @flat_and_saddr_i32_nortn_neg128(ptr inreg %sbase, i32 %vo
|
||||
; GFX950-SDAG-NEXT: s_nop 1
|
||||
; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_and v[0:1], v2
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -3195,6 +3262,7 @@ define amdgpu_ps void @flat_and_saddr_i32_nortn_neg128(ptr inreg %sbase, i32 %vo
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_and v[2:3], v1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -3321,6 +3389,7 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
|
||||
; GFX950-SDAG-NEXT: s_branch .LBB34_5
|
||||
; GFX950-SDAG-NEXT: .LBB34_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_and_x2 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -3364,6 +3433,7 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
|
||||
; GFX950-GISEL-NEXT: s_branch .LBB34_5
|
||||
; GFX950-GISEL-NEXT: .LBB34_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[4:5] sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -3515,6 +3585,7 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
|
||||
; GFX950-SDAG-NEXT: s_branch .LBB35_5
|
||||
; GFX950-SDAG-NEXT: .LBB35_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_and_x2 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -3561,6 +3632,7 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
|
||||
; GFX950-GISEL-NEXT: s_branch .LBB35_5
|
||||
; GFX950-GISEL-NEXT: .LBB35_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[4:5] sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -3691,6 +3763,7 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-SDAG-NEXT: s_endpgm
|
||||
; GFX950-SDAG-NEXT: .LBB36_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_and_x2 v[0:1], v[2:3]
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -3728,6 +3801,7 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-GISEL-NEXT: s_endpgm
|
||||
; GFX950-GISEL-NEXT: .LBB36_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_and_x2 v[0:1], v[4:5]
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -3861,6 +3935,7 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
|
||||
; GFX950-SDAG-NEXT: s_endpgm
|
||||
; GFX950-SDAG-NEXT: .LBB37_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_and_x2 v[0:1], v[2:3]
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -3902,6 +3977,7 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
|
||||
; GFX950-GISEL-NEXT: s_endpgm
|
||||
; GFX950-GISEL-NEXT: .LBB37_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_and_x2 v[0:1], v[4:5]
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -3948,6 +4024,7 @@ define amdgpu_ps float @flat_or_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i3
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_or v0, v[0:1], v2 sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -3960,6 +4037,7 @@ define amdgpu_ps float @flat_or_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i3
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_or v0, v[2:3], v1 sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -3992,6 +4070,7 @@ define amdgpu_ps float @flat_or_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %voff
|
||||
; GFX950-SDAG-NEXT: s_nop 1
|
||||
; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_or v0, v[0:1], v2 sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -4007,6 +4086,7 @@ define amdgpu_ps float @flat_or_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %voff
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_or v0, v[2:3], v1 sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -4037,6 +4117,7 @@ define amdgpu_ps void @flat_or_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset, i
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_or v[0:1], v2
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -4049,6 +4130,7 @@ define amdgpu_ps void @flat_or_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset, i
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_or v[2:3], v1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -4080,6 +4162,7 @@ define amdgpu_ps void @flat_or_saddr_i32_nortn_neg128(ptr inreg %sbase, i32 %vof
|
||||
; GFX950-SDAG-NEXT: s_nop 1
|
||||
; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_or v[0:1], v2
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -4095,6 +4178,7 @@ define amdgpu_ps void @flat_or_saddr_i32_nortn_neg128(ptr inreg %sbase, i32 %vof
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_or v[2:3], v1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -4221,6 +4305,7 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn(ptr inreg %sbase, i32 %voffs
|
||||
; GFX950-SDAG-NEXT: s_branch .LBB42_5
|
||||
; GFX950-SDAG-NEXT: .LBB42_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_or_x2 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -4264,6 +4349,7 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn(ptr inreg %sbase, i32 %voffs
|
||||
; GFX950-GISEL-NEXT: s_branch .LBB42_5
|
||||
; GFX950-GISEL-NEXT: .LBB42_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[4:5] sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -4415,6 +4501,7 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn_neg128(ptr inreg %sbase, i32
|
||||
; GFX950-SDAG-NEXT: s_branch .LBB43_5
|
||||
; GFX950-SDAG-NEXT: .LBB43_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_or_x2 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -4461,6 +4548,7 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn_neg128(ptr inreg %sbase, i32
|
||||
; GFX950-GISEL-NEXT: s_branch .LBB43_5
|
||||
; GFX950-GISEL-NEXT: .LBB43_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[4:5] sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -4591,6 +4679,7 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, i
|
||||
; GFX950-SDAG-NEXT: s_endpgm
|
||||
; GFX950-SDAG-NEXT: .LBB44_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_or_x2 v[0:1], v[2:3]
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -4628,6 +4717,7 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, i
|
||||
; GFX950-GISEL-NEXT: s_endpgm
|
||||
; GFX950-GISEL-NEXT: .LBB44_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_or_x2 v[0:1], v[4:5]
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -4761,6 +4851,7 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vof
|
||||
; GFX950-SDAG-NEXT: s_endpgm
|
||||
; GFX950-SDAG-NEXT: .LBB45_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_or_x2 v[0:1], v[2:3]
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -4802,6 +4893,7 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vof
|
||||
; GFX950-GISEL-NEXT: s_endpgm
|
||||
; GFX950-GISEL-NEXT: .LBB45_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_or_x2 v[0:1], v[4:5]
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -4848,6 +4940,7 @@ define amdgpu_ps float @flat_xor_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_xor v0, v[0:1], v2 sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -4860,6 +4953,7 @@ define amdgpu_ps float @flat_xor_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_xor v0, v[2:3], v1 sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -4892,6 +4986,7 @@ define amdgpu_ps float @flat_xor_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %vof
|
||||
; GFX950-SDAG-NEXT: s_nop 1
|
||||
; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_xor v0, v[0:1], v2 sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -4907,6 +5002,7 @@ define amdgpu_ps float @flat_xor_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %vof
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_xor v0, v[2:3], v1 sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -4937,6 +5033,7 @@ define amdgpu_ps void @flat_xor_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_xor v[0:1], v2
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -4949,6 +5046,7 @@ define amdgpu_ps void @flat_xor_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_xor v[2:3], v1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -4980,6 +5078,7 @@ define amdgpu_ps void @flat_xor_saddr_i32_nortn_neg128(ptr inreg %sbase, i32 %vo
|
||||
; GFX950-SDAG-NEXT: s_nop 1
|
||||
; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_xor v[0:1], v2
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -4995,6 +5094,7 @@ define amdgpu_ps void @flat_xor_saddr_i32_nortn_neg128(ptr inreg %sbase, i32 %vo
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_xor v[2:3], v1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -5121,6 +5221,7 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
|
||||
; GFX950-SDAG-NEXT: s_branch .LBB50_5
|
||||
; GFX950-SDAG-NEXT: .LBB50_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_xor_x2 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -5164,6 +5265,7 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
|
||||
; GFX950-GISEL-NEXT: s_branch .LBB50_5
|
||||
; GFX950-GISEL-NEXT: .LBB50_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[4:5] sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -5315,6 +5417,7 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
|
||||
; GFX950-SDAG-NEXT: s_branch .LBB51_5
|
||||
; GFX950-SDAG-NEXT: .LBB51_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_xor_x2 v[0:1], v[4:5], v[2:3] sc0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -5361,6 +5464,7 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
|
||||
; GFX950-GISEL-NEXT: s_branch .LBB51_5
|
||||
; GFX950-GISEL-NEXT: .LBB51_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[4:5] sc0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -5491,6 +5595,7 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-SDAG-NEXT: s_endpgm
|
||||
; GFX950-SDAG-NEXT: .LBB52_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3]
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -5528,6 +5633,7 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
|
||||
; GFX950-GISEL-NEXT: s_endpgm
|
||||
; GFX950-GISEL-NEXT: .LBB52_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_xor_x2 v[0:1], v[4:5]
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -5661,6 +5767,7 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
|
||||
; GFX950-SDAG-NEXT: s_endpgm
|
||||
; GFX950-SDAG-NEXT: .LBB53_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3]
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc1
|
||||
@ -5702,6 +5809,7 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
|
||||
; GFX950-GISEL-NEXT: s_endpgm
|
||||
; GFX950-GISEL-NEXT: .LBB53_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_xor_x2 v[0:1], v[4:5]
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc1
|
||||
@ -9093,6 +9201,7 @@ define amdgpu_ps float @flat_cmpxchg_saddr_i32_rtn(ptr inreg %sbase, i32 %voffse
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_cmpswap v0, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc0 sc1
|
||||
@ -9106,6 +9215,7 @@ define amdgpu_ps float @flat_cmpxchg_saddr_i32_rtn(ptr inreg %sbase, i32 %voffse
|
||||
; GFX950-GISEL-NEXT: s_nop 0
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v5, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_cmpswap v0, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc0 sc1
|
||||
@ -9140,6 +9250,7 @@ define amdgpu_ps float @flat_cmpxchg_saddr_i32_rtn_neg128(ptr inreg %sbase, i32
|
||||
; GFX950-SDAG-NEXT: s_nop 1
|
||||
; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_cmpswap v0, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc0 sc1
|
||||
@ -9156,6 +9267,7 @@ define amdgpu_ps float @flat_cmpxchg_saddr_i32_rtn_neg128(ptr inreg %sbase, i32
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_cmpswap v0, v[0:1], v[2:3] sc0 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc0 sc1
|
||||
@ -9188,6 +9300,7 @@ define amdgpu_ps void @flat_cmpxchg_saddr_i32_nortn(ptr inreg %sbase, i32 %voffs
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc0 sc1
|
||||
@ -9201,6 +9314,7 @@ define amdgpu_ps void @flat_cmpxchg_saddr_i32_nortn(ptr inreg %sbase, i32 %voffs
|
||||
; GFX950-GISEL-NEXT: s_nop 0
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v5, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc0 sc1
|
||||
@ -9233,6 +9347,7 @@ define amdgpu_ps void @flat_cmpxchg_saddr_i32_nortn_neg128(ptr inreg %sbase, i32
|
||||
; GFX950-SDAG-NEXT: s_nop 1
|
||||
; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc0 sc1
|
||||
@ -9249,6 +9364,7 @@ define amdgpu_ps void @flat_cmpxchg_saddr_i32_nortn_neg128(ptr inreg %sbase, i32
|
||||
; GFX950-GISEL-NEXT: s_nop 1
|
||||
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc0 sc1
|
||||
@ -9379,6 +9495,7 @@ define amdgpu_ps <2 x float> @flat_cmpxchg_saddr_i64_rtn(ptr inreg %sbase, i32 %
|
||||
; GFX950-SDAG-NEXT: s_branch .LBB90_5
|
||||
; GFX950-SDAG-NEXT: .LBB90_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[4:7] sc0 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc0 sc1
|
||||
@ -9426,6 +9543,7 @@ define amdgpu_ps <2 x float> @flat_cmpxchg_saddr_i64_rtn(ptr inreg %sbase, i32 %
|
||||
; GFX950-GISEL-NEXT: s_branch .LBB90_5
|
||||
; GFX950-GISEL-NEXT: .LBB90_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] sc0 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc0 sc1
|
||||
@ -9584,6 +9702,7 @@ define amdgpu_ps <2 x float> @flat_cmpxchg_saddr_i64_rtn_neg128(ptr inreg %sbase
|
||||
; GFX950-SDAG-NEXT: s_branch .LBB91_5
|
||||
; GFX950-SDAG-NEXT: .LBB91_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[4:7] sc0 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc0 sc1
|
||||
@ -9634,6 +9753,7 @@ define amdgpu_ps <2 x float> @flat_cmpxchg_saddr_i64_rtn_neg128(ptr inreg %sbase
|
||||
; GFX950-GISEL-NEXT: s_branch .LBB91_5
|
||||
; GFX950-GISEL-NEXT: .LBB91_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] sc0 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc0 sc1
|
||||
@ -9771,6 +9891,7 @@ define amdgpu_ps void @flat_cmpxchg_saddr_i64_nortn(ptr inreg %sbase, i32 %voffs
|
||||
; GFX950-SDAG-NEXT: s_endpgm
|
||||
; GFX950-SDAG-NEXT: .LBB92_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:7] sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc0 sc1
|
||||
@ -9812,6 +9933,7 @@ define amdgpu_ps void @flat_cmpxchg_saddr_i64_nortn(ptr inreg %sbase, i32 %voffs
|
||||
; GFX950-GISEL-NEXT: s_endpgm
|
||||
; GFX950-GISEL-NEXT: .LBB92_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:9] sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc0 sc1
|
||||
@ -9951,6 +10073,7 @@ define amdgpu_ps void @flat_cmpxchg_saddr_i64_nortn_neg128(ptr inreg %sbase, i32
|
||||
; GFX950-SDAG-NEXT: s_endpgm
|
||||
; GFX950-SDAG-NEXT: .LBB93_3: ; %atomicrmw.global
|
||||
; GFX950-SDAG-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:7] sc1
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: buffer_inv sc0 sc1
|
||||
@ -9996,6 +10119,7 @@ define amdgpu_ps void @flat_cmpxchg_saddr_i64_nortn_neg128(ptr inreg %sbase, i32
|
||||
; GFX950-GISEL-NEXT: s_endpgm
|
||||
; GFX950-GISEL-NEXT: .LBB93_3: ; %atomicrmw.global
|
||||
; GFX950-GISEL-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:9] sc1
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: buffer_inv sc0 sc1
|
||||
|
||||
@ -14,6 +14,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -57,6 +58,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -97,6 +99,7 @@ define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) {
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 4.0
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
|
||||
@ -1483,7 +1483,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1]
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1496,7 +1496,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], 4.0
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1540,7 +1540,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], 4.0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1573,7 +1573,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1]
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1586,7 +1586,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], 4.0
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1630,7 +1630,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], 4.0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1662,6 +1662,7 @@ define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %dat
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1673,6 +1674,7 @@ define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %dat
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1712,6 +1714,7 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1742,6 +1745,7 @@ define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, doub
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1753,6 +1757,7 @@ define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, doub
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1806,7 +1811,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[0:1], 4.0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1840,6 +1845,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_add_f64 v[2:3], v[0:1]
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1853,6 +1859,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1897,6 +1904,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f64 v[2:3], v[0:1]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1930,6 +1938,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_add_f64 v[2:3], v[0:1]
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1943,6 +1952,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1974,6 +1984,7 @@ define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 {
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1985,6 +1996,7 @@ define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 {
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -2024,6 +2036,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2054,6 +2067,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 {
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2066,6 +2080,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 {
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -2121,6 +2136,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: flat_atomic_add_f64 v[2:3], v[0:1]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
|
||||
@ -48,6 +48,7 @@ define float @global_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory(pt
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -245,6 +246,7 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -444,6 +446,7 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_gr
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -653,6 +656,7 @@ define void @global_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -844,6 +848,7 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1038,6 +1043,7 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:-2048
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1241,6 +1247,7 @@ define float @global_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1290,6 +1297,7 @@ define float @global_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_add_f32_e32 v4, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1443,6 +1451,7 @@ define void @global_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1491,6 +1500,7 @@ define void @global_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_add_f32_e32 v4, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1639,6 +1649,7 @@ define float @global_agent_atomic_fadd_ret_f32_maybe_remote(ptr addrspace(1) %pt
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1854,6 +1865,7 @@ define float @global_agent_atomic_fadd_ret_f32_maybe_remote__amdgpu_ignore_denor
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2069,6 +2081,7 @@ define void @global_agent_atomic_fadd_noret_f32_maybe_remote__amdgpu_ignore_deno
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2277,6 +2290,7 @@ define float @global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory(p
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2476,6 +2490,7 @@ define float @global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory__
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2661,6 +2676,7 @@ define float @global_agent_atomic_fadd_ret_f32_amdgpu_ignore_denormal_mode(ptr a
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2876,6 +2892,7 @@ define void @global_agent_atomic_fadd_noret_f32_maybe_remote(ptr addrspace(1) %p
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3084,6 +3101,7 @@ define void @global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory(
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3278,6 +3296,7 @@ define void @global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory_
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3446,6 +3465,7 @@ define void @global_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode(ptr
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3654,6 +3674,7 @@ define float @global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory(ptr addr
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3867,6 +3888,7 @@ define void @global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory(ptr add
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4072,6 +4094,7 @@ define float @global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4285,6 +4308,7 @@ define void @global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4490,6 +4514,7 @@ define float @global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4687,6 +4712,7 @@ define void @global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4882,6 +4908,7 @@ define float @global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memo
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5065,6 +5092,7 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fi
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5250,6 +5278,7 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fi
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5445,6 +5474,7 @@ define void @global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_mem
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5610,6 +5640,7 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_f
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5778,6 +5809,7 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_f
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:-2048
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5955,6 +5987,7 @@ define float @global_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_f
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -5998,6 +6031,7 @@ define float @global_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_f
|
||||
; GFX90A: ; %bb.0:
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -6143,6 +6177,7 @@ define void @global_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -6185,6 +6220,7 @@ define void @global_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_
|
||||
; GFX90A: ; %bb.0:
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -6313,6 +6349,7 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_pos__ieee__amdgpu_no_f
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6498,6 +6535,7 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_pos__ieee__amdgpu_no_
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6666,6 +6704,7 @@ define float @global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_remote_memory(ptr
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6879,6 +6918,7 @@ define void @global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_remote_memory(pt
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7084,6 +7124,7 @@ define float @global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memo
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7267,6 +7308,7 @@ define void @global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_mem
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7453,6 +7495,7 @@ define double @global_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7687,6 +7730,7 @@ define double @global_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off offset:2040 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7922,6 +7966,7 @@ define double @global_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8159,6 +8204,7 @@ define void @global_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[2:3], off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8375,6 +8421,7 @@ define void @global_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[2:3], off offset:2040
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8594,6 +8641,7 @@ define void @global_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[2:3], off offset:-2048
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8975,6 +9023,7 @@ define half @global_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -9483,6 +9532,7 @@ define half @global_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -10003,6 +10053,7 @@ define half @global_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -10510,6 +10561,7 @@ define void @global_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -11002,6 +11054,7 @@ define void @global_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -11506,6 +11559,7 @@ define void @global_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -11957,6 +12011,7 @@ define half @global_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_
|
||||
; GFX942-NEXT: v_add_f16_e32 v3, v5, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -12335,6 +12390,7 @@ define void @global_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_n
|
||||
; GFX942-NEXT: v_add_f16_e32 v3, v5, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -12768,6 +12824,7 @@ define half @global_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -12917,6 +12974,7 @@ define half @global_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX90A-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX90A-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -13283,6 +13341,7 @@ define void @global_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -13427,6 +13486,7 @@ define void @global_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX90A-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX90A-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -13827,6 +13887,7 @@ define bfloat @global_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory(
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14424,6 +14485,7 @@ define bfloat @global_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -15035,6 +15097,7 @@ define bfloat @global_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -15631,6 +15694,7 @@ define void @global_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory(
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -16213,6 +16277,7 @@ define void @global_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -16809,6 +16874,7 @@ define void @global_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17351,6 +17417,7 @@ define bfloat @global_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17828,6 +17895,7 @@ define void @global_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -18358,6 +18426,7 @@ define bfloat @global_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -18543,6 +18612,7 @@ define bfloat @global_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX90A-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -18966,6 +19036,7 @@ define void @global_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -19145,6 +19216,7 @@ define void @global_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX90A-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -19364,6 +19436,7 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_me
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -19596,6 +19669,7 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -19830,6 +19904,7 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -20072,6 +20147,7 @@ define void @global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -20280,6 +20356,7 @@ define void @global_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off offset:2044
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -20491,6 +20568,7 @@ define void @global_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off offset:-2048
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -20711,6 +20789,7 @@ define <2 x half> @global_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -20770,6 +20849,7 @@ define <2 x half> @global_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no
|
||||
; GFX90A: ; %bb.0:
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -20948,6 +21028,7 @@ define void @global_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fin
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off offset:2044 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -21004,6 +21085,7 @@ define void @global_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fin
|
||||
; GFX90A: ; %bb.0:
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off offset:2044
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -21161,6 +21243,7 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory(p
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -21407,6 +21490,7 @@ define void @global_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory(ptr a
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -21641,6 +21725,7 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_me
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -21873,6 +21958,7 @@ define void @global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -22081,6 +22167,7 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__maybe_remote(ptr addrspac
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -22327,6 +22414,7 @@ define void @global_agent_atomic_fadd_noret_v2f16__maybe_remote(ptr addrspace(1)
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -22565,6 +22653,7 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -22961,6 +23050,7 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -23359,6 +23449,7 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -23765,6 +23856,7 @@ define void @global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memor
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -24145,6 +24237,7 @@ define void @global_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fin
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off offset:2044
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -24528,6 +24621,7 @@ define void @global_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fin
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off offset:-2048
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -24920,6 +25014,7 @@ define <2 x bfloat> @global_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -25089,6 +25184,7 @@ define <2 x bfloat> @global_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu
|
||||
; GFX90A-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc
|
||||
; GFX90A-NEXT: v_perm_b32 v2, v6, v2, s9
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -25321,6 +25417,7 @@ define void @global_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fi
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off offset:2044 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -25485,6 +25582,7 @@ define void @global_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fi
|
||||
; GFX90A-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc
|
||||
; GFX90A-NEXT: v_perm_b32 v2, v6, v2, s9
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -25706,6 +25804,7 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memor
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -26102,6 +26201,7 @@ define void @global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory(ptr
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -26482,6 +26582,7 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -26878,6 +26979,7 @@ define void @global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memor
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -27258,6 +27360,7 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__maybe_remote(ptr addrs
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -27654,6 +27757,7 @@ define void @global_agent_atomic_fadd_noret_v2bf16__maybe_remote(ptr addrspace(1
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
|
||||
@ -41,6 +41,7 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(pt
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -202,6 +203,7 @@ define float @global_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -365,6 +367,7 @@ define float @global_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -528,6 +531,7 @@ define void @global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -683,6 +687,7 @@ define void @global_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -841,6 +846,7 @@ define void @global_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1000,6 +1006,7 @@ define float @global_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1045,6 +1052,7 @@ define float @global_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1166,6 +1174,7 @@ define void @global_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1210,6 +1219,7 @@ define void @global_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1326,6 +1336,7 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory(ptr addr
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1556,6 +1567,7 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__a
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1721,6 +1733,7 @@ define float @global_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memo
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1882,6 +1895,7 @@ define float @global_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fi
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2045,6 +2059,7 @@ define float @global_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fi
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2208,6 +2223,7 @@ define void @global_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_mem
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2363,6 +2379,7 @@ define void @global_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_f
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2521,6 +2538,7 @@ define void @global_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_f
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2680,6 +2698,7 @@ define float @global_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_f
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -2725,6 +2744,7 @@ define float @global_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_f
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2846,6 +2866,7 @@ define void @global_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -2890,6 +2911,7 @@ define void @global_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -3021,6 +3043,7 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3193,6 +3216,7 @@ define double @global_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off offset:2040 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3366,6 +3390,7 @@ define double @global_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3538,6 +3563,7 @@ define void @global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[2:3], off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3700,6 +3726,7 @@ define void @global_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[2:3], off offset:2040
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3865,6 +3892,7 @@ define void @global_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[2:3], off offset:-2048
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4031,6 +4059,7 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_remote_memory(ptr add
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4278,6 +4307,7 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4532,6 +4562,7 @@ define half @global_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4975,6 +5006,7 @@ define half @global_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5431,6 +5463,7 @@ define half @global_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5876,6 +5909,7 @@ define void @global_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6307,6 +6341,7 @@ define void @global_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6750,6 +6785,7 @@ define void @global_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7158,6 +7194,7 @@ define half @global_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_
|
||||
; GFX942-NEXT: v_max_f16_e32 v2, v2, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s2, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7494,6 +7531,7 @@ define void @global_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_n
|
||||
; GFX942-NEXT: v_max_f16_e32 v2, v2, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s2, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7863,6 +7901,7 @@ define half @global_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -8023,6 +8062,7 @@ define half @global_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX90A-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -8319,6 +8359,7 @@ define void @global_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -8474,6 +8515,7 @@ define void @global_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX90A-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -8787,6 +8829,7 @@ define bfloat @global_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory(
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -9295,6 +9338,7 @@ define bfloat @global_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -9815,6 +9859,7 @@ define bfloat @global_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -10324,6 +10369,7 @@ define void @global_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory(
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -10818,6 +10864,7 @@ define void @global_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -11323,6 +11370,7 @@ define void @global_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -11797,6 +11845,7 @@ define bfloat @global_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -12207,6 +12256,7 @@ define void @global_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -12647,6 +12697,7 @@ define bfloat @global_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -12832,6 +12883,7 @@ define bfloat @global_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX90A-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -13166,6 +13218,7 @@ define void @global_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -13345,6 +13398,7 @@ define void @global_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX90A-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -13581,6 +13635,7 @@ define <2 x half> @global_agent_atomic_fmax_ret_v2f16__amdgpu_no_fine_grained_me
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_max_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -13862,6 +13917,7 @@ define <2 x half> @global_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_max_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14145,6 +14201,7 @@ define <2 x half> @global_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_max_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14435,6 +14492,7 @@ define void @global_agent_atomic_fmax_noret_v2f16__amdgpu_no_fine_grained_memory
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_max_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14703,6 +14761,7 @@ define void @global_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_max_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14974,6 +15033,7 @@ define void @global_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_max_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -15255,6 +15315,7 @@ define <2 x half> @global_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_max_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -15334,6 +15395,7 @@ define <2 x half> @global_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no
|
||||
; GFX90A-NEXT: v_pk_max_f16 v2, v3, v3
|
||||
; GFX90A-NEXT: v_pk_max_f16 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -15540,6 +15602,7 @@ define void @global_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fin
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_max_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -15616,6 +15679,7 @@ define void @global_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fin
|
||||
; GFX90A-NEXT: v_pk_max_f16 v2, v3, v3
|
||||
; GFX90A-NEXT: v_pk_max_f16 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -15905,6 +15969,7 @@ define <2 x bfloat> @global_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -16411,6 +16476,7 @@ define <2 x bfloat> @global_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -16919,6 +16985,7 @@ define <2 x bfloat> @global_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17431,6 +17498,7 @@ define void @global_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memor
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17916,6 +17984,7 @@ define void @global_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fin
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -18404,6 +18473,7 @@ define void @global_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fin
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -18906,6 +18976,7 @@ define <2 x bfloat> @global_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -19083,6 +19154,7 @@ define <2 x bfloat> @global_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu
|
||||
; GFX90A-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc
|
||||
; GFX90A-NEXT: v_perm_b32 v2, v6, v2, s9
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -19414,6 +19486,7 @@ define void @global_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fi
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -19585,6 +19658,7 @@ define void @global_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fi
|
||||
; GFX90A-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc
|
||||
; GFX90A-NEXT: v_perm_b32 v2, v6, v2, s9
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
|
||||
@ -41,6 +41,7 @@ define float @global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(pt
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -202,6 +203,7 @@ define float @global_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -365,6 +367,7 @@ define float @global_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -528,6 +531,7 @@ define void @global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -683,6 +687,7 @@ define void @global_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -841,6 +846,7 @@ define void @global_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1000,6 +1006,7 @@ define float @global_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1045,6 +1052,7 @@ define float @global_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX90A-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1166,6 +1174,7 @@ define void @global_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1210,6 +1219,7 @@ define void @global_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX90A-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1326,6 +1336,7 @@ define float @global_agent_atomic_fmin_ret_f32__amdgpu_no_remote_memory(ptr addr
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1556,6 +1567,7 @@ define float @global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory__a
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1721,6 +1733,7 @@ define float @global_agent_atomic_fmin_ret_f32__ftz__amdgpu_no_fine_grained_memo
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1882,6 +1895,7 @@ define float @global_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fi
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2045,6 +2059,7 @@ define float @global_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fi
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2208,6 +2223,7 @@ define void @global_agent_atomic_fmin_noret_f32__ftz__amdgpu_no_fine_grained_mem
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2363,6 +2379,7 @@ define void @global_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_f
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2521,6 +2538,7 @@ define void @global_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_f
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2680,6 +2698,7 @@ define float @global_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_f
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -2725,6 +2744,7 @@ define float @global_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_f
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX90A-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -2846,6 +2866,7 @@ define void @global_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_
|
||||
; GFX942-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX942-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -2890,6 +2911,7 @@ define void @global_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_
|
||||
; GFX90A-NEXT: v_max_f32_e32 v2, v3, v3
|
||||
; GFX90A-NEXT: v_min_f32_e32 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -3021,6 +3043,7 @@ define double @global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3193,6 +3216,7 @@ define double @global_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off offset:2040 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3366,6 +3390,7 @@ define double @global_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3538,6 +3563,7 @@ define void @global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[2:3], off
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3700,6 +3726,7 @@ define void @global_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[2:3], off offset:2040
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3865,6 +3892,7 @@ define void @global_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[2:3], off offset:-2048
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4031,6 +4059,7 @@ define double @global_agent_atomic_fmin_ret_f64__amdgpu_no_remote_memory(ptr add
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4278,6 +4307,7 @@ define double @global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4532,6 +4562,7 @@ define half @global_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4975,6 +5006,7 @@ define half @global_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5431,6 +5463,7 @@ define half @global_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5876,6 +5909,7 @@ define void @global_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory(p
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6307,6 +6341,7 @@ define void @global_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6750,6 +6785,7 @@ define void @global_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7158,6 +7194,7 @@ define half @global_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_
|
||||
; GFX942-NEXT: v_min_f16_e32 v2, v2, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s2, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7494,6 +7531,7 @@ define void @global_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_n
|
||||
; GFX942-NEXT: v_min_f16_e32 v2, v2, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s2, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7863,6 +7901,7 @@ define half @global_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -8023,6 +8062,7 @@ define half @global_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX90A-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -8319,6 +8359,7 @@ define void @global_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -8474,6 +8515,7 @@ define void @global_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX90A-NEXT: v_lshlrev_b32_e32 v2, v4, v2
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -8787,6 +8829,7 @@ define bfloat @global_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory(
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -9295,6 +9338,7 @@ define bfloat @global_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -9815,6 +9859,7 @@ define bfloat @global_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -10324,6 +10369,7 @@ define void @global_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory(
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -10818,6 +10864,7 @@ define void @global_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -11323,6 +11370,7 @@ define void @global_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -11797,6 +11845,7 @@ define bfloat @global_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -12207,6 +12256,7 @@ define void @global_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -12647,6 +12697,7 @@ define bfloat @global_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -12832,6 +12883,7 @@ define bfloat @global_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX90A-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -13166,6 +13218,7 @@ define void @global_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -13345,6 +13398,7 @@ define void @global_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX90A-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -13581,6 +13635,7 @@ define <2 x half> @global_agent_atomic_fmin_ret_v2f16__amdgpu_no_fine_grained_me
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_min_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -13862,6 +13917,7 @@ define <2 x half> @global_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_min_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14145,6 +14201,7 @@ define <2 x half> @global_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_min_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14435,6 +14492,7 @@ define void @global_agent_atomic_fmin_noret_v2f16__amdgpu_no_fine_grained_memory
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_min_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14703,6 +14761,7 @@ define void @global_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_min_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14974,6 +15033,7 @@ define void @global_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_min_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -15255,6 +15315,7 @@ define <2 x half> @global_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_min_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -15334,6 +15395,7 @@ define <2 x half> @global_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no
|
||||
; GFX90A-NEXT: v_pk_max_f16 v2, v3, v3
|
||||
; GFX90A-NEXT: v_pk_min_f16 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -15540,6 +15602,7 @@ define void @global_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fin
|
||||
; GFX942-NEXT: s_nop 0
|
||||
; GFX942-NEXT: v_pk_min_f16 v2, v2, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -15616,6 +15679,7 @@ define void @global_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fin
|
||||
; GFX90A-NEXT: v_pk_max_f16 v2, v3, v3
|
||||
; GFX90A-NEXT: v_pk_min_f16 v2, v2, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -15905,6 +15969,7 @@ define <2 x bfloat> @global_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -16411,6 +16476,7 @@ define <2 x bfloat> @global_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -16919,6 +16985,7 @@ define <2 x bfloat> @global_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17431,6 +17498,7 @@ define void @global_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memor
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17916,6 +17984,7 @@ define void @global_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fin
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -18404,6 +18473,7 @@ define void @global_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fin
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -18906,6 +18976,7 @@ define <2 x bfloat> @global_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -19083,6 +19154,7 @@ define <2 x bfloat> @global_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu
|
||||
; GFX90A-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc
|
||||
; GFX90A-NEXT: v_perm_b32 v2, v6, v2, s9
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -19414,6 +19486,7 @@ define void @global_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fi
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -19585,6 +19658,7 @@ define void @global_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fi
|
||||
; GFX90A-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc
|
||||
; GFX90A-NEXT: v_perm_b32 v2, v6, v2, s9
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
|
||||
@ -56,6 +56,7 @@ define float @global_agent_atomic_fsub_ret_f32(ptr addrspace(1) %ptr, float %val
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -287,6 +288,7 @@ define float @global_agent_atomic_fsub_ret_f32__offset12b_pos(ptr addrspace(1) %
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -520,6 +522,7 @@ define float @global_agent_atomic_fsub_ret_f32__offset12b_neg(ptr addrspace(1) %
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -761,6 +764,7 @@ define void @global_agent_atomic_fsub_noret_f32(ptr addrspace(1) %ptr, float %va
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -981,6 +985,7 @@ define void @global_agent_atomic_fsub_noret_f32__offset12b_pos(ptr addrspace(1)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1204,6 +1209,7 @@ define void @global_agent_atomic_fsub_noret_f32__offset12b_neg(ptr addrspace(1)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -1438,6 +1444,7 @@ define float @global_system_atomic_fsub_ret_f32__offset12b_pos(ptr addrspace(1)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1511,6 +1518,7 @@ define float @global_system_atomic_fsub_ret_f32__offset12b_pos(ptr addrspace(1)
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1672,6 +1680,7 @@ define void @global_system_atomic_fsub_noret_f32__offset12b_pos(ptr addrspace(1)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -1741,6 +1750,7 @@ define void @global_system_atomic_fsub_noret_f32__offset12b_pos(ptr addrspace(1)
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -1903,6 +1913,7 @@ define float @global_agent_atomic_fsub_ret_f32__ftz(ptr addrspace(1) %ptr, float
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2134,6 +2145,7 @@ define float @global_agent_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr addrspace
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2367,6 +2379,7 @@ define float @global_agent_atomic_fsub_ret_f32__offset12b_neg__ftz(ptr addrspace
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2608,6 +2621,7 @@ define void @global_agent_atomic_fsub_noret_f32__ftz(ptr addrspace(1) %ptr, floa
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -2828,6 +2842,7 @@ define void @global_agent_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr addrspac
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3051,6 +3066,7 @@ define void @global_agent_atomic_fsub_noret_f32__offset12b_neg__ftz(ptr addrspac
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -3285,6 +3301,7 @@ define float @global_system_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr addrspac
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -3358,6 +3375,7 @@ define float @global_system_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr addrspac
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -3519,6 +3537,7 @@ define void @global_system_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr addrspa
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -3588,6 +3607,7 @@ define void @global_system_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr addrspa
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_sub_f32_e32 v4, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -3750,6 +3770,7 @@ define double @global_agent_atomic_fsub_ret_f64(ptr addrspace(1) %ptr, double %v
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4001,6 +4022,7 @@ define double @global_agent_atomic_fsub_ret_f64__offset12b_pos(ptr addrspace(1)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:2040 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4253,6 +4275,7 @@ define double @global_agent_atomic_fsub_ret_f64__offset12b_neg(ptr addrspace(1)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4507,6 +4530,7 @@ define void @global_agent_atomic_fsub_noret_f64(ptr addrspace(1) %ptr, double %v
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4736,6 +4760,7 @@ define void @global_agent_atomic_fsub_noret_f64__offset12b_pos(ptr addrspace(1)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:2040 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -4968,6 +4993,7 @@ define void @global_agent_atomic_fsub_noret_f64__offset12b_neg(ptr addrspace(1)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5281,6 +5307,7 @@ define half @global_agent_atomic_fsub_ret_f16(ptr addrspace(1) %ptr, half %val)
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -5701,6 +5728,7 @@ define half @global_agent_atomic_fsub_ret_f16__offset12b_pos(ptr addrspace(1) %p
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6132,6 +6160,7 @@ define half @global_agent_atomic_fsub_ret_f16__offset12b_neg(ptr addrspace(1) %p
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6555,6 +6584,7 @@ define void @global_agent_atomic_fsub_noret_f16(ptr addrspace(1) %ptr, half %val
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -6963,6 +6993,7 @@ define void @global_agent_atomic_fsub_noret_f16__offset12b_pos(ptr addrspace(1)
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7381,6 +7412,7 @@ define void @global_agent_atomic_fsub_noret_f16__offset12b_neg(ptr addrspace(1)
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -7767,6 +7799,7 @@ define half @global_agent_atomic_fsub_ret_f16__offset12b_pos__align4(ptr addrspa
|
||||
; GFX942-NEXT: v_sub_f16_e32 v3, v5, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8083,6 +8116,7 @@ define void @global_agent_atomic_fsub_noret_f16__offset12b__align4_pos(ptr addrs
|
||||
; GFX942-NEXT: v_sub_f16_e32 v3, v5, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -8429,6 +8463,7 @@ define half @global_system_atomic_fsub_ret_f16__offset12b_pos(ptr addrspace(1) %
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -8578,6 +8613,7 @@ define half @global_system_atomic_fsub_ret_f16__offset12b_pos(ptr addrspace(1) %
|
||||
; GFX90A-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX90A-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -8860,6 +8896,7 @@ define void @global_system_atomic_fsub_noret_f16__offset12b_pos(ptr addrspace(1)
|
||||
; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -9004,6 +9041,7 @@ define void @global_system_atomic_fsub_noret_f16__offset12b_pos(ptr addrspace(1)
|
||||
; GFX90A-NEXT: v_lshlrev_b32_e32 v4, v3, v4
|
||||
; GFX90A-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -9313,6 +9351,7 @@ define bfloat @global_agent_atomic_fsub_ret_bf16(ptr addrspace(1) %ptr, bfloat %
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -9819,6 +9858,7 @@ define bfloat @global_agent_atomic_fsub_ret_bf16__offset12b_pos(ptr addrspace(1)
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -10337,6 +10377,7 @@ define bfloat @global_agent_atomic_fsub_ret_bf16__offset12b_neg(ptr addrspace(1)
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -10844,6 +10885,7 @@ define void @global_agent_atomic_fsub_noret_bf16(ptr addrspace(1) %ptr, bfloat %
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -11336,6 +11378,7 @@ define void @global_agent_atomic_fsub_noret_bf16__offset12b_pos(ptr addrspace(1)
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -11839,6 +11882,7 @@ define void @global_agent_atomic_fsub_noret_bf16__offset12b_neg(ptr addrspace(1)
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -12311,6 +12355,7 @@ define bfloat @global_agent_atomic_fsub_ret_bf16__offset12b_pos__align4(ptr addr
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -12719,6 +12764,7 @@ define void @global_agent_atomic_fsub_noret_bf16__offset12b__align4_pos(ptr addr
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -13157,6 +13203,7 @@ define bfloat @global_system_atomic_fsub_ret_bf16__offset12b_pos(ptr addrspace(1
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -13342,6 +13389,7 @@ define bfloat @global_system_atomic_fsub_ret_bf16__offset12b_pos(ptr addrspace(1
|
||||
; GFX90A-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -13674,6 +13722,7 @@ define void @global_system_atomic_fsub_noret_bf16__offset12b_pos(ptr addrspace(1
|
||||
; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -13853,6 +13902,7 @@ define void @global_system_atomic_fsub_noret_bf16__offset12b_pos(ptr addrspace(1
|
||||
; GFX90A-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; GFX90A-NEXT: v_and_or_b32 v2, v3, v5, v2
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -14082,6 +14132,7 @@ define <2 x half> @global_agent_atomic_fsub_ret_v2f16(ptr addrspace(1) %ptr, <2
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14346,6 +14397,7 @@ define <2 x half> @global_agent_atomic_fsub_ret_v2f16__offset12b_pos(ptr addrspa
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14612,6 +14664,7 @@ define <2 x half> @global_agent_atomic_fsub_ret_v2f16__offset12b_neg(ptr addrspa
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -14884,6 +14937,7 @@ define void @global_agent_atomic_fsub_noret_v2f16(ptr addrspace(1) %ptr, <2 x ha
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -15133,6 +15187,7 @@ define void @global_agent_atomic_fsub_noret_v2f16__offset12b_pos(ptr addrspace(1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -15385,6 +15440,7 @@ define void @global_agent_atomic_fsub_noret_v2f16__offset12b_neg(ptr addrspace(1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -15648,6 +15704,7 @@ define <2 x half> @global_system_atomic_fsub_ret_v2f16__offset12b_pos(ptr addrsp
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -15721,6 +15778,7 @@ define <2 x half> @global_system_atomic_fsub_ret_v2f16__offset12b_pos(ptr addrsp
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -15915,6 +15973,7 @@ define void @global_system_atomic_fsub_noret_v2f16__offset12b_pos(ptr addrspace(
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -15984,6 +16043,7 @@ define void @global_system_atomic_fsub_noret_v2f16__offset12b_pos(ptr addrspace(
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -16267,6 +16327,7 @@ define <2 x bfloat> @global_agent_atomic_fsub_ret_v2bf16(ptr addrspace(1) %ptr,
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -16773,6 +16834,7 @@ define <2 x bfloat> @global_agent_atomic_fsub_ret_v2bf16__offset12b_pos(ptr addr
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17281,6 +17343,7 @@ define <2 x bfloat> @global_agent_atomic_fsub_ret_v2bf16__offset12b_neg(ptr addr
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -17793,6 +17856,7 @@ define void @global_agent_atomic_fsub_noret_v2bf16(ptr addrspace(1) %ptr, <2 x b
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -18278,6 +18342,7 @@ define void @global_agent_atomic_fsub_noret_v2bf16__offset12b_pos(ptr addrspace(
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -18766,6 +18831,7 @@ define void @global_agent_atomic_fsub_noret_v2bf16__offset12b_neg(ptr addrspace(
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -19268,6 +19334,7 @@ define <2 x bfloat> @global_system_atomic_fsub_ret_v2bf16__offset12b_pos(ptr add
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -19445,6 +19512,7 @@ define <2 x bfloat> @global_system_atomic_fsub_ret_v2bf16__offset12b_pos(ptr add
|
||||
; GFX90A-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc
|
||||
; GFX90A-NEXT: v_perm_b32 v2, v6, v2, s9
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -19776,6 +19844,7 @@ define void @global_system_atomic_fsub_noret_v2bf16__offset12b_pos(ptr addrspace
|
||||
; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1]
|
||||
; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5
|
||||
; GFX942-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc0 sc1
|
||||
@ -19947,6 +20016,7 @@ define void @global_system_atomic_fsub_noret_v2bf16__offset12b_pos(ptr addrspace
|
||||
; GFX90A-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc
|
||||
; GFX90A-NEXT: v_perm_b32 v2, v6, v2, s9
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942 %s
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes='require<libcall-lowering-info>,atomic-expand' < %s | FileCheck --check-prefix=OPT %s
|
||||
|
||||
@ -13,7 +13,6 @@ define i32 @global_agent_monotonic_idempotent_or(ptr addrspace(1) %in) {
|
||||
; OPT-NEXT: entry:
|
||||
; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("agent-one-as") monotonic, align 4
|
||||
; OPT-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
entry:
|
||||
%val = atomicrmw or ptr addrspace(1) %in, i32 0 syncscope("agent-one-as") monotonic, align 4
|
||||
ret i32 %val
|
||||
@ -31,7 +30,6 @@ define i32 @global_agent_acquire_idempotent_or(ptr addrspace(1) %in) {
|
||||
; OPT-NEXT: entry:
|
||||
; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("agent-one-as") acquire, align 4
|
||||
; OPT-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
entry:
|
||||
%val = atomicrmw or ptr addrspace(1) %in, i32 0 syncscope("agent-one-as") acquire, align 4
|
||||
ret i32 %val
|
||||
@ -43,6 +41,7 @@ define i32 @global_agent_release_idempotent_or(ptr addrspace(1) %in) {
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_or v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -50,7 +49,6 @@ define i32 @global_agent_release_idempotent_or(ptr addrspace(1) %in) {
|
||||
; OPT-NEXT: entry:
|
||||
; OPT-NEXT: [[VAL:%.*]] = atomicrmw add ptr addrspace(1) [[IN:%.*]], i32 0 syncscope("agent-one-as") release, align 4
|
||||
; OPT-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
entry:
|
||||
%val = atomicrmw or ptr addrspace(1) %in, i32 0 syncscope("agent-one-as") release, align 4
|
||||
ret i32 %val
|
||||
@ -62,6 +60,7 @@ define i32 @global_agent_release_idempotent_or_no_remote(ptr addrspace(1) %in) {
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_or v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -80,6 +79,7 @@ define i32 @global_agent_release_idempotent_or_no_fine_grained(ptr addrspace(1)
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_or v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -98,6 +98,7 @@ define i32 @global_agent_acquire_release_idempotent_or(ptr addrspace(1) %in) {
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_or v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -106,7 +107,6 @@ define i32 @global_agent_acquire_release_idempotent_or(ptr addrspace(1) %in) {
|
||||
; OPT-NEXT: entry:
|
||||
; OPT-NEXT: [[VAL:%.*]] = atomicrmw add ptr addrspace(1) [[IN:%.*]], i32 0 syncscope("agent-one-as") acq_rel, align 4
|
||||
; OPT-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
entry:
|
||||
%val = atomicrmw or ptr addrspace(1) %in, i32 0 syncscope("agent-one-as") acq_rel, align 4
|
||||
ret i32 %val
|
||||
@ -118,6 +118,7 @@ define i32 @global_agent_acquire_release_idempotent_or__no_fine_grained(ptr addr
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_or v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -137,6 +138,7 @@ define i32 @global_agent_seq_cst_idempotent_or(ptr addrspace(1) %in) {
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX942-NEXT: buffer_wbl2 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_or v0, v[0:1], v2, off sc0
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: buffer_inv sc1
|
||||
@ -145,7 +147,6 @@ define i32 @global_agent_seq_cst_idempotent_or(ptr addrspace(1) %in) {
|
||||
; OPT-NEXT: entry:
|
||||
; OPT-NEXT: [[VAL:%.*]] = atomicrmw add ptr addrspace(1) [[IN:%.*]], i32 0 syncscope("agent-one-as") seq_cst, align 4
|
||||
; OPT-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
entry:
|
||||
%val = atomicrmw or ptr addrspace(1) %in, i32 0 syncscope("agent-one-as") seq_cst, align 4
|
||||
ret i32 %val
|
||||
@ -162,7 +163,6 @@ define i32 @global_agent_monotonic_idempotent_add(ptr addrspace(1) %in) {
|
||||
; OPT-NEXT: entry:
|
||||
; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("workgroup") monotonic, align 4
|
||||
; OPT-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
entry:
|
||||
%val = atomicrmw add ptr addrspace(1) %in, i32 0 syncscope("workgroup") monotonic, align 4
|
||||
ret i32 %val
|
||||
@ -179,7 +179,6 @@ define i32 @global_agent_monotonic_idempotent_add__no_fine_grained(ptr addrspace
|
||||
; OPT-NEXT: entry:
|
||||
; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]]
|
||||
; OPT-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
entry:
|
||||
%val = atomicrmw add ptr addrspace(1) %in, i32 0 syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory !0
|
||||
ret i32 %val
|
||||
@ -196,7 +195,6 @@ define i32 @global_agent_monotonic_idempotent_sub(ptr addrspace(1) %in) {
|
||||
; OPT-NEXT: entry:
|
||||
; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("wavefront") monotonic, align 4
|
||||
; OPT-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
entry:
|
||||
%val = atomicrmw sub ptr addrspace(1) %in, i32 0 syncscope("wavefront") monotonic, align 4
|
||||
ret i32 %val
|
||||
@ -213,7 +211,6 @@ define i32 @global_agent_monotonic_idempotent_sub__no_fine_grained(ptr addrspace
|
||||
; OPT-NEXT: entry:
|
||||
; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]]
|
||||
; OPT-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
entry:
|
||||
%val = atomicrmw sub ptr addrspace(1) %in, i32 0 syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory !0
|
||||
ret i32 %val
|
||||
@ -230,7 +227,6 @@ define i32 @global_system_monotonic_idempotent_xor(ptr addrspace(1) %in) {
|
||||
; OPT-NEXT: entry:
|
||||
; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] monotonic, align 4
|
||||
; OPT-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
entry:
|
||||
%val = atomicrmw xor ptr addrspace(1) %in, i32 0 monotonic, align 4
|
||||
ret i32 %val
|
||||
@ -247,7 +243,6 @@ define i32 @global_system_monotonic_idempotent_xor__no_fine_grained(ptr addrspac
|
||||
; OPT-NEXT: entry:
|
||||
; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]]
|
||||
; OPT-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
entry:
|
||||
%val = atomicrmw xor ptr addrspace(1) %in, i32 0 monotonic, align 4, !amdgpu.no.fine.grained.memory !0
|
||||
ret i32 %val
|
||||
@ -264,7 +259,6 @@ define i32 @global_agent_monotonic_idempotent_and(ptr addrspace(1) %in) {
|
||||
; OPT-NEXT: entry:
|
||||
; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("singlethread") monotonic, align 4
|
||||
; OPT-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
entry:
|
||||
%val = atomicrmw and ptr addrspace(1) %in, i32 -1 syncscope("singlethread") monotonic, align 4
|
||||
ret i32 %val
|
||||
@ -281,7 +275,6 @@ define i32 @global_agent_monotonic_idempotent_and_no_fined_grain(ptr addrspace(1
|
||||
; OPT-NEXT: entry:
|
||||
; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]]
|
||||
; OPT-NEXT: ret i32 [[VAL]]
|
||||
;
|
||||
entry:
|
||||
%val = atomicrmw and ptr addrspace(1) %in, i32 -1 syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory !0
|
||||
ret i32 %val
|
||||
|
||||
@ -168,6 +168,7 @@ define i32 @atomic_nand_i32_global(ptr addrspace(1) %ptr) nounwind {
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: v_bfi_b32 v2, v3, -5, -1
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
@ -822,6 +823,7 @@ define void @flat_atomic_xchg_i32_noret(ptr %ptr, i32 %in) {
|
||||
; GFX90A: ; %bb.0:
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_atomic_swap v[0:1], v2
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: buffer_invl2
|
||||
|
||||
@ -103,9 +103,10 @@
|
||||
; GCN-NEXT: ; implicit-def: $vgpr197
|
||||
; GCN-NEXT: ; iglp_opt mask(0x00000002)
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ds_write_b128 v230, v[64:67]
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b128 v230, v[68:71] offset:1024
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: buffer_load_dwordx4 v[160:163], v226, s[8:11], 0 offen offset:64 sc0 sc1
|
||||
@ -150,10 +151,11 @@
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[64:65], v[152:153], 0
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ds_write_b128 v230, v[160:163]
|
||||
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[66:67], v[154:155], v[80:95]
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b128 v230, v[164:167] offset:1024
|
||||
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[168:169], v[152:153], 0
|
||||
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[170:171], v[154:155], v[64:79]
|
||||
@ -199,9 +201,10 @@
|
||||
; GCN-NEXT: s_waitcnt vmcnt(8)
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ds_write_b128 v230, v[152:155]
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b128 v230, v[160:163] offset:1024
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: buffer_load_dwordx4 v[152:155], v226, s[8:11], 0 offen offset:192 sc0 sc1
|
||||
@ -280,9 +283,10 @@
|
||||
; GCN-NEXT: s_waitcnt vmcnt(8)
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ds_write_b128 v230, v[152:155]
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b128 v230, v[226:229] offset:1024
|
||||
; GCN-NEXT: ;;#ASMSTART
|
||||
; GCN-NEXT: s_waitcnt vmcnt(8)
|
||||
@ -322,15 +326,16 @@
|
||||
; GCN-NEXT: s_waitcnt vmcnt(8)
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ds_write_b64 v199, v[238:239]
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b64 v200, v[240:241]
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b64 v201, v[242:243]
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b64 v202, v[244:245]
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: buffer_load_dwordx2 v[192:193], v247, s[0:3], 0 offen sc0 sc1
|
||||
@ -649,15 +654,16 @@
|
||||
; GCN-NEXT: s_waitcnt vmcnt(8)
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ds_write_b64 v199, v[188:189]
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b64 v200, v[190:191]
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b64 v201, v[192:193]
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b64 v202, v[194:195]
|
||||
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[146:147], v[126:127], v[32:47]
|
||||
; GCN-NEXT: v_exp_f32_e32 v101, v125
|
||||
@ -792,16 +798,17 @@
|
||||
; GCN-NEXT: s_waitcnt vmcnt(8)
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ds_write_b64 v199, v[126:127]
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b64 v200, v[150:151]
|
||||
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[130:131], v[144:145], v[0:15]
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b64 v201, v[152:153]
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b64 v202, v[154:155]
|
||||
; GCN-NEXT: v_fma_f32 v127, s4, v84, -v128
|
||||
; GCN-NEXT: v_exp_f32_e32 v84, v129
|
||||
@ -942,18 +949,19 @@
|
||||
; GCN-NEXT: s_waitcnt vmcnt(8)
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ds_write_b64 v199, v[150:151]
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b64 v200, v[152:153]
|
||||
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[132:133], v[142:143], v[0:15]
|
||||
; GCN-NEXT: v_cvt_f16_f32_e32 v132, v125
|
||||
; GCN-NEXT: v_exp_f32_e32 v130, v158
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b64 v201, v[154:155]
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b64 v202, v[156:157]
|
||||
; GCN-NEXT: ;;#ASMSTART
|
||||
; GCN-NEXT: s_waitcnt vmcnt(8)
|
||||
|
||||
@ -39,6 +39,7 @@
|
||||
; GCN-NEXT: v_add_u32_e32 v76, s20, v76
|
||||
; GCN-NEXT: v_and_b32_e32 v76, 0x1fffffff, v76
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ds_write_b128 v48, v[0:3]
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: buffer_load_dwordx4 v[32:35], v4, s[0:3], 0 offen offset:64 sc0 sc1
|
||||
@ -91,6 +92,7 @@
|
||||
; GCN-NEXT: s_waitcnt vmcnt(8)
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ds_write_b128 v48, v[32:35]
|
||||
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[36:37], v[40:41], v[16:31]
|
||||
; GCN-NEXT: ;;#ASMSTART
|
||||
@ -138,12 +140,13 @@
|
||||
; GCN-NEXT: v_perm_b32 v71, v74, v72, s3
|
||||
; GCN-NEXT: v_perm_b32 v72, v75, v73, s2
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ds_write_b32 v76, v70
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b32 v77, v71
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b32 v78, v72
|
||||
; GCN-NEXT: v_mul_f32_e32 v74, s4, v20
|
||||
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[68:69], v[64:65], v[0:15]
|
||||
@ -197,7 +200,7 @@
|
||||
; GCN-NEXT: ds_bpermute_b32 v65, v66, v64
|
||||
; GCN-NEXT: v_perm_b32 v68, v75, v73, s3
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b32 v79, v68
|
||||
; GCN-NEXT: ; implicit-def: $vgpr84
|
||||
; GCN-NEXT: v_max_f32_e32 v65, v65, v65
|
||||
@ -310,6 +313,7 @@
|
||||
; GCN-NEXT: s_waitcnt vmcnt(8)
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ds_write_b32 v76, v31
|
||||
; GCN-NEXT: v_mul_f32_e32 v31, 0x3fb8aa3b, v67
|
||||
; GCN-NEXT: v_exp_f32_e32 v31, v31
|
||||
@ -317,13 +321,13 @@
|
||||
; GCN-NEXT: v_pack_b32_f16 v18, v19, v86
|
||||
; GCN-NEXT: v_pack_b32_f16 v19, v22, v89
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b32 v77, v64
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b32 v78, v90
|
||||
; GCN-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: ds_write_b32 v79, v65
|
||||
; GCN-NEXT: v_mul_f32_e32 v64, 0x3fb8aa3b, v73
|
||||
; GCN-NEXT: v_mul_f32_e32 v65, 0x3fb8aa3b, v87
|
||||
|
||||
57
llvm/test/CodeGen/AMDGPU/waitcnt-wbl2.ll
Normal file
57
llvm/test/CodeGen/AMDGPU/waitcnt-wbl2.ll
Normal file
@ -0,0 +1,57 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950 %s
|
||||
|
||||
; Test that vmcnt(0) is correctly preserved between buffer_wbl2 and atomic
|
||||
; when there are global memory stores that need to be written back.
|
||||
|
||||
define void @global_store_different_block(ptr addrspace(1) %data_ptr, ptr addrspace(1) %atomic_ptr, i1 %cond) {
|
||||
; GFX950-LABEL: global_store_different_block:
|
||||
; GFX950: ; %bb.0: ; %entry
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: v_and_b32_e32 v4, 1, v4
|
||||
; GFX950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4
|
||||
; GFX950-NEXT: v_mov_b32_e32 v4, 42
|
||||
; GFX950-NEXT: global_store_dword v[0:1], v4, off
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX950-NEXT: s_cbranch_execz .LBB0_2
|
||||
; GFX950-NEXT: ; %bb.1: ; %do_atomic
|
||||
; GFX950-NEXT: v_mov_b64_e32 v[0:1], 0
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap_x2 v[2:3], v[0:1], off
|
||||
; GFX950-NEXT: .LBB0_2: ; %exit
|
||||
; GFX950-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
||||
entry:
|
||||
; Global store in entry block
|
||||
store i32 42, ptr addrspace(1) %data_ptr, align 4
|
||||
call void @llvm.amdgcn.s.waitcnt(i32 112)
|
||||
br i1 %cond, label %do_atomic, label %exit
|
||||
|
||||
do_atomic:
|
||||
%old = atomicrmw xchg ptr addrspace(1) %atomic_ptr, i64 0 syncscope("agent") release
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @global_store_then_atomic(ptr addrspace(1) %data_ptr, ptr addrspace(1) %atomic_ptr) {
|
||||
; GFX950-LABEL: global_store_then_atomic:
|
||||
; GFX950: ; %bb.0: ; %entry
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX950-NEXT: v_mov_b32_e32 v4, 42
|
||||
; GFX950-NEXT: global_store_dword v[0:1], v4, off
|
||||
; GFX950-NEXT: v_mov_b64_e32 v[0:1], 0
|
||||
; GFX950-NEXT: buffer_wbl2 sc1
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: global_atomic_swap_x2 v[2:3], v[0:1], off
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: s_setpc_b64 s[30:31]
|
||||
entry:
|
||||
store i32 42, ptr addrspace(1) %data_ptr, align 4
|
||||
%old = atomicrmw xchg ptr addrspace(1) %atomic_ptr, i64 0 syncscope("agent") release
|
||||
ret void
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user