Compare commits

...

4 Commits

Author SHA1 Message Date
pvanhout
857644e104 Comments 2025-08-22 10:14:27 +02:00
pvanhout
3b51b225ba Drop -CU suffix 2025-08-22 10:14:27 +02:00
pvanhout
e83355bd69 clang-format 2025-08-22 10:14:26 +02:00
pvanhout
c66127cb33 [AMDGPU][gfx1250] Implement SIMemoryLegalizer
Implements the base of the MemoryLegalizer for a roughly correct GFX1250 memory model.
Documentation will come later, and some remaining changes still have to be added, but this is the backbone of the model.
2025-08-22 10:14:26 +02:00
30 changed files with 1792 additions and 1011 deletions

View File

@ -1831,6 +1831,10 @@ public:
bool hasScratchBaseForwardingHazard() const {
return GFX1250Insts && getGeneration() == GFX12;
}
/// \returns true if the subtarget requires a wait for xcnt before atomic
/// flat/global stores & rmw.
bool requiresWaitXCntBeforeAtomicStores() const { return GFX1250Insts; }
};
class GCNUserSGPRUsageInfo {

View File

@ -1051,6 +1051,8 @@ public:
return AMDGPU::S_WAIT_DSCNT;
case AMDGPU::S_WAIT_KMCNT_soft:
return AMDGPU::S_WAIT_KMCNT;
case AMDGPU::S_WAIT_XCNT_soft:
return AMDGPU::S_WAIT_XCNT;
default:
return Opcode;
}

View File

@ -587,7 +587,11 @@ protected:
SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const;
public:
SIGfx12CacheControl(const GCNSubtarget &ST) : SIGfx11CacheControl(ST) {}
SIGfx12CacheControl(const GCNSubtarget &ST) : SIGfx11CacheControl(ST) {
// GFX12.0 and GFX12.5 memory models greatly overlap, and in some cases
// the behavior is the same if assuming GFX12.0 in CU mode.
assert(!ST.hasGFX1250Insts() || ST.isCuModeEnabled());
}
bool insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
@ -2340,12 +2344,16 @@ bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI,
STORECnt |= true;
break;
case SIAtomicScope::WORKGROUP:
// In WGP mode the waves of a work-group can be executing on either CU of
// the WGP. Therefore need to wait for operations to complete to ensure
// they are visible to waves in the other CU as the L0 is per CU.
// Otherwise in CU mode and all waves of a work-group are on the same CU
// which shares the same L0.
if (!ST.isCuModeEnabled()) {
// GFX12.0:
// In WGP mode the waves of a work-group can be executing on either CU
// of the WGP. Therefore need to wait for operations to complete to
// ensure they are visible to waves in the other CU as the L0 is per CU.
// Otherwise in CU mode and all waves of a work-group are on the same CU
// which shares the same L0.
//
// GFX12.5:
// TODO DOCS
if (!ST.isCuModeEnabled() || ST.hasGFX1250Insts()) {
if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
LOADCnt |= true;
if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
@ -2397,7 +2405,7 @@ bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI,
//
// This also applies to fences. Fences cannot pair with an instruction
// tracked with bvh/samplecnt as we don't have any atomics that do that.
if (Order != AtomicOrdering::Acquire) {
if (Order != AtomicOrdering::Acquire && ST.hasImageInsts()) {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_BVHCNT_soft)).addImm(0);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_SAMPLECNT_soft)).addImm(0);
}
@ -2449,10 +2457,14 @@ bool SIGfx12CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
ScopeImm = AMDGPU::CPol::SCOPE_DEV;
break;
case SIAtomicScope::WORKGROUP:
// In WGP mode the waves of a work-group can be executing on either CU of
// the WGP. Therefore we need to invalidate the L0 which is per CU.
// Otherwise in CU mode all waves of a work-group are on the same CU, and so
// the L0 does not need to be invalidated.
// GFX12.0:
// In WGP mode the waves of a work-group can be executing on either CU of
// the WGP. Therefore we need to invalidate the L0 which is per CU.
// Otherwise in CU mode all waves of a work-group are on the same CU, and
// so the L0 does not need to be invalidated.
//
// GFX12.5
// TODO DOCS
if (ST.isCuModeEnabled())
return false;
@ -2497,7 +2509,8 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
if (Pos == Position::AFTER)
++MI;
// global_wb is only necessary at system scope for gfx120x targets.
// global_wb is only necessary at system scope for GFX12.0,
// they're also necessary at device scope for GFX12.5.
//
// Emitting it for lower scopes is a slow no-op, so we omit it
// for performance.
@ -2507,6 +2520,12 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
.addImm(AMDGPU::CPol::SCOPE_SYS);
break;
case SIAtomicScope::AGENT:
// TODO DOCS
if (ST.hasGFX1250Insts()) {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::GLOBAL_WB))
.addImm(AMDGPU::CPol::SCOPE_DEV);
}
break;
case SIAtomicScope::WORKGROUP:
// No WB necessary, but we still have to wait.
break;
@ -2569,17 +2588,32 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
}
bool SIGfx12CacheControl::finalizeStore(MachineInstr &MI, bool Atomic) const {
MachineOperand *CPol = TII->getNamedOperand(MI, OpName::cpol);
if (!CPol)
return false;
assert(MI.mayStore() && "Not a Store inst");
const bool IsRMW = (MI.mayLoad() && MI.mayStore());
bool Changed = false;
// GFX12.5 only: xcnt wait is needed before flat and global atomics
// stores/rmw.
if (Atomic && ST.requiresWaitXCntBeforeAtomicStores() && TII->isFLAT(MI)) {
MachineBasicBlock &MBB = *MI.getParent();
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(S_WAIT_XCNT_soft)).addImm(0);
Changed = true;
}
// Remaining fixes do not apply to RMWs.
if (IsRMW)
return Changed;
MachineOperand *CPol = TII->getNamedOperand(MI, OpName::cpol);
if (!CPol) // Some vmem operations do not have a scope and are not concerned.
return Changed;
const unsigned Scope = CPol->getImm() & CPol::SCOPE;
// GFX12.0 only: Extra waits needed before system scope stores.
if (!ST.hasGFX1250Insts()) {
if (!Atomic && Scope == CPol::SCOPE_SYS)
return insertWaitsBeforeSystemScopeStore(MI);
return false;
return Changed;
}
// GFX12.5 only: Require SCOPE_SE on stores that may hit the scratch address
@ -2589,7 +2623,7 @@ bool SIGfx12CacheControl::finalizeStore(MachineInstr &MI, bool Atomic) const {
(!ST.hasCUStores() || TII->mayAccessScratchThroughFlat(MI)))
return setScope(MI, CPol::SCOPE_SE);
return false;
return Changed;
}
bool SIGfx12CacheControl::setAtomicScope(const MachineBasicBlock::iterator &MI,
@ -2778,6 +2812,7 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
assert(MI->mayLoad() && MI->mayStore());
bool Changed = false;
MachineInstr &RMWMI = *MI;
if (MOI.isAtomic()) {
const AtomicOrdering Order = MOI.getOrdering();
@ -2812,6 +2847,7 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
Position::AFTER);
}
Changed |= CC->finalizeStore(RMWMI, /*Atomic=*/true);
return Changed;
}

View File

@ -1656,6 +1656,11 @@ let OtherPredicates = [HasImageInsts] in {
def S_WAIT_KMCNT_soft : SOPP_Pseudo <"s_soft_wait_kmcnt", (ins s16imm:$simm16), "$simm16">;
}
let SubtargetPredicate = HasWaitXcnt in {
def S_WAIT_XCNT_soft : SOPP_Pseudo<"", (ins s16imm:$simm16), "$simm16">;
}
// Represents the point at which a wave must wait for all outstanding direct loads to LDS.
// Typically inserted by the memory legalizer and consumed by SIInsertWaitcnts.

View File

@ -1501,6 +1501,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -1571,6 +1572,9 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -1645,6 +1649,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -1715,6 +1720,9 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -1792,6 +1800,7 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
@ -1902,6 +1911,9 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -1947,6 +1959,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -1987,6 +2000,9 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -2031,6 +2047,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -2107,6 +2124,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2190,6 +2208,9 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -2418,6 +2439,7 @@ define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]

View File

@ -364,6 +364,7 @@ define i16 @global_one_as_atomic_min_i16(ptr addrspace(1) %ptr, i16 %val) {
; GFX1250-NEXT: v_lshlrev_b32_e32 v5, v3, v5
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_and_or_b32 v6, v7, v4, v5
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v5, v[0:1], v[6:7], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
@ -406,6 +407,7 @@ define i16 @global_one_as_atomic_umin_i16(ptr addrspace(1) %ptr, i16 %val) {
; GFX1250-NEXT: v_lshlrev_b32_e32 v5, v3, v5
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_and_or_b32 v6, v7, v4, v5
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v5, v[0:1], v[6:7], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
@ -448,6 +450,7 @@ define i16 @global_one_as_atomic_max_i16(ptr addrspace(1) %ptr, i16 %val) {
; GFX1250-NEXT: v_lshlrev_b32_e32 v5, v3, v5
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_and_or_b32 v6, v7, v4, v5
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v5, v[0:1], v[6:7], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
@ -490,6 +493,7 @@ define i16 @global_one_as_atomic_umax_i16(ptr addrspace(1) %ptr, i16 %val) {
; GFX1250-NEXT: v_lshlrev_b32_e32 v5, v3, v5
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_and_or_b32 v6, v7, v4, v5
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v5, v[0:1], v[6:7], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
@ -1344,6 +1348,7 @@ define i16 @flat_one_as_atomic_min_i16(ptr %ptr, i16 %val) {
; GFX1250-NEXT: v_lshlrev_b32_e32 v5, v3, v5
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_and_or_b32 v6, v7, v4, v5
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v5, v[0:1], v[6:7] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
@ -1386,6 +1391,7 @@ define i16 @flat_one_as_atomic_umin_i16(ptr %ptr, i16 %val) {
; GFX1250-NEXT: v_lshlrev_b32_e32 v5, v3, v5
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_and_or_b32 v6, v7, v4, v5
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v5, v[0:1], v[6:7] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
@ -1428,6 +1434,7 @@ define i16 @flat_one_as_atomic_max_i16(ptr %ptr, i16 %val) {
; GFX1250-NEXT: v_lshlrev_b32_e32 v5, v3, v5
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_and_or_b32 v6, v7, v4, v5
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v5, v[0:1], v[6:7] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
@ -1470,6 +1477,7 @@ define i16 @flat_one_as_atomic_umax_i16(ptr %ptr, i16 %val) {
; GFX1250-NEXT: v_lshlrev_b32_e32 v5, v3, v5
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_and_or_b32 v6, v7, v4, v5
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v5, v[0:1], v[6:7] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7

File diff suppressed because it is too large Load Diff

View File

@ -1473,6 +1473,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -1513,6 +1514,9 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -1557,6 +1561,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -1597,6 +1602,9 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -1673,6 +1681,7 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
@ -1765,6 +1774,9 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -1809,6 +1821,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -1849,6 +1862,9 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -1893,6 +1909,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -1969,6 +1986,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@ -2063,6 +2081,9 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -2136,6 +2157,7 @@ define double @local_atomic_fadd_f64_rtn(ptr addrspace(3) %ptr, double %data) {
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@ -2275,6 +2297,7 @@ define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@ -2307,6 +2330,7 @@ define double @local_atomic_fadd_f64_rtn_ieee_unsafe(ptr addrspace(3) %ptr, doub
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@ -2339,6 +2363,7 @@ define double @local_atomic_fadd_f64_rtn_ieee_safe(ptr addrspace(3) %ptr, double
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]

View File

@ -82,6 +82,8 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
;
; GFX1250-LABEL: workgroup_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
@ -153,6 +155,8 @@ define amdgpu_kernel void @workgroup_release_fence() {
;
; GFX1250-LABEL: workgroup_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
@ -229,6 +233,8 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
;
; GFX1250-LABEL: workgroup_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
@ -305,6 +311,8 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
;
; GFX1250-LABEL: workgroup_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
@ -379,6 +387,8 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
;
; GFX1250-LABEL: workgroup_one_as_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
@ -450,6 +460,8 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
;
; GFX1250-LABEL: workgroup_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
@ -526,6 +538,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
;
; GFX1250-LABEL: workgroup_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
@ -602,6 +616,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
;
; GFX1250-LABEL: workgroup_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
@ -787,8 +803,7 @@ define amdgpu_kernel void @agent_release_fence() {
;
; GFX1250-LABEL: agent_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
@ -893,8 +908,7 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
;
; GFX1250-LABEL: agent_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -1000,8 +1014,7 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
;
; GFX1250-LABEL: agent_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -1190,8 +1203,7 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
;
; GFX1250-LABEL: agent_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
@ -1296,8 +1308,7 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
;
; GFX1250-LABEL: agent_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -1403,8 +1414,7 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
;
; GFX1250-LABEL: agent_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -1600,8 +1610,6 @@ define amdgpu_kernel void @system_release_fence() {
; GFX1250-LABEL: system_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
@ -1713,8 +1721,6 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX1250-LABEL: system_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -1827,8 +1833,6 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX1250-LABEL: system_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -2024,8 +2028,6 @@ define amdgpu_kernel void @system_one_as_release_fence() {
; GFX1250-LABEL: system_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
@ -2137,8 +2139,6 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX1250-LABEL: system_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -2251,8 +2251,6 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX1250-LABEL: system_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS

View File

@ -1066,7 +1066,8 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
;
; GFX1250-LABEL: workgroup_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire
@ -1146,7 +1147,8 @@ define amdgpu_kernel void @workgroup_release_fence() {
;
; GFX1250-LABEL: workgroup_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release
@ -1231,7 +1233,8 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
;
; GFX1250-LABEL: workgroup_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel
@ -1316,7 +1319,8 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
;
; GFX1250-LABEL: workgroup_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst
@ -1391,6 +1395,8 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
;
; GFX1250-LABEL: workgroup_one_as_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acquire
@ -1462,6 +1468,8 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
;
; GFX1250-LABEL: workgroup_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") release
@ -1538,6 +1546,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
;
; GFX1250-LABEL: workgroup_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acq_rel
@ -1614,6 +1624,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
;
; GFX1250-LABEL: workgroup_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") seq_cst
@ -1799,8 +1811,7 @@ define amdgpu_kernel void @agent_release_fence() {
;
; GFX1250-LABEL: agent_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -1905,8 +1916,7 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
;
; GFX1250-LABEL: agent_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -2012,8 +2022,7 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
;
; GFX1250-LABEL: agent_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -2202,8 +2211,7 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
;
; GFX1250-LABEL: agent_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
@ -2308,8 +2316,7 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
;
; GFX1250-LABEL: agent_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -2415,8 +2422,7 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
;
; GFX1250-LABEL: agent_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -2612,8 +2618,6 @@ define amdgpu_kernel void @system_release_fence() {
; GFX1250-LABEL: system_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -2725,8 +2729,6 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX1250-LABEL: system_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -2839,8 +2841,6 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX1250-LABEL: system_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -3036,8 +3036,6 @@ define amdgpu_kernel void @system_one_as_release_fence() {
; GFX1250-LABEL: system_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
@ -3149,8 +3147,6 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX1250-LABEL: system_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -3263,8 +3259,6 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX1250-LABEL: system_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS

View File

@ -830,14 +830,10 @@ define amdgpu_kernel void @flat_agent_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -1000,6 +996,7 @@ define amdgpu_kernel void @flat_agent_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -1159,6 +1156,7 @@ define amdgpu_kernel void @flat_agent_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -1342,9 +1340,9 @@ define amdgpu_kernel void @flat_agent_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@ -1529,9 +1527,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@ -1692,6 +1690,7 @@ define amdgpu_kernel void @flat_agent_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -1882,6 +1881,7 @@ define amdgpu_kernel void @flat_agent_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -2067,9 +2067,9 @@ define amdgpu_kernel void @flat_agent_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@ -2285,9 +2285,9 @@ define amdgpu_kernel void @flat_agent_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -2505,9 +2505,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -2729,6 +2729,7 @@ define amdgpu_kernel void @flat_agent_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -2979,13 +2980,11 @@ define amdgpu_kernel void @flat_agent_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -3235,13 +3234,11 @@ define amdgpu_kernel void @flat_agent_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -3496,6 +3493,7 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -3779,6 +3777,7 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -4057,9 +4056,9 @@ define amdgpu_kernel void @flat_agent_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@ -4368,9 +4367,9 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -4681,9 +4680,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -4970,6 +4969,7 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -5255,6 +5255,7 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -5564,9 +5565,9 @@ define amdgpu_kernel void @flat_agent_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -5877,9 +5878,9 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -6190,9 +6191,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -6503,9 +6504,9 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -6816,9 +6817,9 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -7129,9 +7130,9 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -7442,9 +7443,9 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -7755,9 +7756,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -8057,6 +8058,7 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -8372,6 +8374,7 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -8697,9 +8700,9 @@ define amdgpu_kernel void @flat_agent_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@ -9044,13 +9047,11 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -9394,13 +9395,11 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -9720,9 +9719,8 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -10038,6 +10036,7 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -10382,13 +10381,11 @@ define amdgpu_kernel void @flat_agent_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -10732,13 +10729,11 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -11082,13 +11077,11 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -11432,13 +11425,11 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -11778,9 +11769,9 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@ -12126,13 +12117,11 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -12476,13 +12465,11 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -12826,13 +12813,11 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -13684,14 +13669,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -13855,6 +13836,7 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -14014,6 +13996,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -14197,10 +14180,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -14384,10 +14367,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -14547,6 +14530,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -14733,6 +14717,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -14918,10 +14903,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -15132,10 +15117,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -15348,10 +15333,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -15582,6 +15567,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -15843,13 +15829,11 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -16110,13 +16094,11 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -16372,6 +16354,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -16651,6 +16634,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -16929,10 +16913,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -17236,10 +17220,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -17545,10 +17529,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -17830,6 +17814,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -18111,6 +18096,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -18416,10 +18402,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -18725,10 +18711,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -19034,10 +19020,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -19343,10 +19329,10 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -19652,10 +19638,10 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -19961,10 +19947,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -20270,10 +20256,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -20579,10 +20565,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -20881,6 +20867,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -21206,6 +21193,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -21532,10 +21520,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -21889,13 +21877,11 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -22250,13 +22236,11 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -22587,9 +22571,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -22916,6 +22899,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -23271,13 +23255,11 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -23632,13 +23614,11 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -23993,13 +23973,11 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -24354,13 +24332,11 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -24711,10 +24687,10 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -25070,13 +25046,11 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -25431,13 +25405,11 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -25792,13 +25764,11 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0

View File

@ -115,8 +115,6 @@ define amdgpu_kernel void @flat_last_use_and_volatile_load(ptr %in, ptr %out) {
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm

View File

@ -1353,8 +1353,6 @@ define amdgpu_kernel void @flat_nontemporal_volatile_load(
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm

View File

@ -936,6 +936,7 @@ define amdgpu_kernel void @flat_singlethread_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -1095,6 +1096,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -1254,6 +1256,7 @@ define amdgpu_kernel void @flat_singlethread_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -1413,6 +1416,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -1572,6 +1576,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -1731,6 +1736,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -1890,6 +1896,7 @@ define amdgpu_kernel void @flat_singlethread_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -2049,6 +2056,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -2208,6 +2216,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -2411,6 +2420,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -2617,6 +2627,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -2823,6 +2834,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -3077,6 +3089,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -3329,6 +3342,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -3581,6 +3595,7 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -3833,6 +3848,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -4085,6 +4101,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -4337,6 +4354,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -4589,6 +4607,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -4841,6 +4860,7 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -5093,6 +5113,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -5345,6 +5366,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -5597,6 +5619,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -5849,6 +5872,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -6101,6 +6125,7 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -6353,6 +6378,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -6605,6 +6631,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -6901,6 +6928,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -7201,6 +7229,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -7501,6 +7530,7 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -7801,6 +7831,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -8101,6 +8132,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -8401,6 +8433,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -8701,6 +8734,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -9001,6 +9035,7 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -9301,6 +9336,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -9601,6 +9637,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -9901,6 +9938,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -10201,6 +10239,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -10501,6 +10540,7 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -10801,6 +10841,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -11101,6 +11142,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -12037,6 +12079,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -12196,6 +12239,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -12355,6 +12399,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -12514,6 +12559,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -12673,6 +12719,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -12832,6 +12879,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -12991,6 +13039,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -13150,6 +13199,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -13309,6 +13359,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -13512,6 +13563,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -13718,6 +13770,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -13924,6 +13977,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -14178,6 +14232,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -14430,6 +14485,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -14682,6 +14738,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -14934,6 +14991,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -15186,6 +15244,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -15438,6 +15497,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -15690,6 +15750,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -15942,6 +16003,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -16194,6 +16256,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -16446,6 +16509,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -16698,6 +16762,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -16950,6 +17015,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -17202,6 +17268,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -17454,6 +17521,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -17706,6 +17774,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -18002,6 +18071,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_ret_cmpx
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -18302,6 +18372,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -18602,6 +18673,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -18902,6 +18974,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -19202,6 +19275,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -19502,6 +19576,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -19802,6 +19877,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -20102,6 +20178,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -20402,6 +20479,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -20702,6 +20780,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -21002,6 +21081,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -21302,6 +21382,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -21602,6 +21683,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -21902,6 +21984,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -22202,6 +22285,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE

View File

@ -834,14 +834,10 @@ define amdgpu_kernel void @flat_system_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -1004,6 +1000,7 @@ define amdgpu_kernel void @flat_system_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -1163,6 +1160,7 @@ define amdgpu_kernel void @flat_system_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -1351,9 +1349,8 @@ define amdgpu_kernel void @flat_system_release_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@ -1543,9 +1540,8 @@ define amdgpu_kernel void @flat_system_seq_cst_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@ -1706,6 +1702,7 @@ define amdgpu_kernel void @flat_system_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -1898,6 +1895,7 @@ define amdgpu_kernel void @flat_system_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -2088,9 +2086,8 @@ define amdgpu_kernel void @flat_system_release_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@ -2313,9 +2310,8 @@ define amdgpu_kernel void @flat_system_acq_rel_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -2540,9 +2536,8 @@ define amdgpu_kernel void @flat_system_seq_cst_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -2766,6 +2761,7 @@ define amdgpu_kernel void @flat_system_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -3023,13 +3019,10 @@ define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -3286,13 +3279,10 @@ define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -3547,6 +3537,7 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -3832,6 +3823,7 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -4115,9 +4107,8 @@ define amdgpu_kernel void @flat_system_release_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@ -4433,9 +4424,8 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -4753,9 +4743,8 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -5044,6 +5033,7 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -5331,6 +5321,7 @@ define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -5647,9 +5638,8 @@ define amdgpu_kernel void @flat_system_release_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -5967,9 +5957,8 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -6287,9 +6276,8 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -6607,9 +6595,8 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -6927,9 +6914,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -7247,9 +7233,8 @@ define amdgpu_kernel void @flat_system_release_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -7567,9 +7552,8 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -7887,9 +7871,8 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@ -8189,6 +8172,7 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -8506,6 +8490,7 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -8836,9 +8821,8 @@ define amdgpu_kernel void @flat_system_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@ -9190,13 +9174,10 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -9547,13 +9528,10 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -9875,9 +9853,8 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -10195,6 +10172,7 @@ define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -10546,13 +10524,10 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -10903,13 +10878,10 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -11260,13 +11232,10 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -11617,13 +11586,10 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -11970,9 +11936,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@ -12325,13 +12290,10 @@ define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -12682,13 +12644,10 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -13039,13 +12998,10 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -13901,14 +13857,10 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -14072,6 +14024,7 @@ define amdgpu_kernel void @flat_system_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -14231,6 +14184,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -14419,10 +14373,9 @@ define amdgpu_kernel void @flat_system_one_as_release_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -14611,10 +14564,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -14774,6 +14726,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -14962,6 +14915,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -15152,10 +15106,9 @@ define amdgpu_kernel void @flat_system_one_as_release_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -15373,10 +15326,9 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -15596,10 +15548,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -15832,6 +15783,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -16100,13 +16052,10 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -16374,13 +16323,10 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -16636,6 +16582,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -16917,6 +16864,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -17200,10 +17148,9 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -17514,10 +17461,9 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -17830,10 +17776,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -18117,6 +18062,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -18400,6 +18346,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -18712,10 +18659,9 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -19028,10 +18974,9 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -19344,10 +19289,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -19660,10 +19604,9 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -19976,10 +19919,9 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -20292,10 +20234,9 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -20608,10 +20549,9 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -20924,10 +20864,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -21226,6 +21165,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -21553,6 +21493,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -21884,10 +21825,9 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -22248,13 +22188,10 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -22616,13 +22553,10 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -22955,9 +22889,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -23286,6 +23219,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -23648,13 +23582,10 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -24016,13 +23947,10 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -24384,13 +24312,10 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -24752,13 +24677,10 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -25116,10 +25038,9 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -25482,13 +25403,10 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -25850,13 +25768,10 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -26218,13 +26133,10 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0

View File

@ -152,8 +152,6 @@ define amdgpu_kernel void @flat_nontemporal_load_0(
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
@ -441,8 +439,6 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v1, s[2:3] scale_offset scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
@ -1167,7 +1163,9 @@ define amdgpu_kernel void @flat_volatile_workgroup_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {

View File

@ -936,6 +936,7 @@ define amdgpu_kernel void @flat_wavefront_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -1095,6 +1096,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -1254,6 +1256,7 @@ define amdgpu_kernel void @flat_wavefront_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -1413,6 +1416,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -1572,6 +1576,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -1731,6 +1736,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -1890,6 +1896,7 @@ define amdgpu_kernel void @flat_wavefront_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -2049,6 +2056,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -2208,6 +2216,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -2411,6 +2420,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -2617,6 +2627,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -2823,6 +2834,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -3077,6 +3089,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -3329,6 +3342,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -3581,6 +3595,7 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -3833,6 +3848,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -4085,6 +4101,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -4337,6 +4354,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -4589,6 +4607,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -4841,6 +4860,7 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -5093,6 +5113,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -5345,6 +5366,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -5597,6 +5619,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -5849,6 +5872,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -6101,6 +6125,7 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -6353,6 +6378,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -6605,6 +6631,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -6901,6 +6928,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -7201,6 +7229,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -7501,6 +7530,7 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -7801,6 +7831,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -8101,6 +8132,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -8401,6 +8433,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -8701,6 +8734,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -9001,6 +9035,7 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -9301,6 +9336,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -9601,6 +9637,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -9901,6 +9938,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -10201,6 +10239,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -10501,6 +10540,7 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -10801,6 +10841,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -11101,6 +11142,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -12037,6 +12079,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -12196,6 +12239,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -12355,6 +12399,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -12514,6 +12559,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -12673,6 +12719,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -12832,6 +12879,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -12991,6 +13039,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -13150,6 +13199,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -13309,6 +13359,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -13512,6 +13563,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -13718,6 +13770,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -13924,6 +13977,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -14178,6 +14232,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -14430,6 +14485,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -14682,6 +14738,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -14934,6 +14991,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -15186,6 +15244,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -15438,6 +15497,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -15690,6 +15750,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -15942,6 +16003,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -16194,6 +16256,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -16446,6 +16509,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -16698,6 +16762,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -16950,6 +17015,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -17202,6 +17268,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -17454,6 +17521,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -17706,6 +17774,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -18002,6 +18071,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -18302,6 +18372,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -18602,6 +18673,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -18902,6 +18974,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -19202,6 +19275,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -19502,6 +19576,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -19802,6 +19877,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -20102,6 +20178,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -20402,6 +20479,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -20702,6 +20780,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -21002,6 +21081,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -21302,6 +21382,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -21602,6 +21683,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -21902,6 +21984,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE

View File

@ -816,7 +816,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@ -980,6 +981,7 @@ define amdgpu_kernel void @flat_workgroup_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -1139,6 +1141,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -1315,7 +1318,9 @@ define amdgpu_kernel void @flat_workgroup_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -1492,7 +1497,9 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -1652,6 +1659,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -1830,8 +1838,9 @@ define amdgpu_kernel void @flat_workgroup_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@ -2007,7 +2016,9 @@ define amdgpu_kernel void @flat_workgroup_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -2203,9 +2214,11 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@ -2400,9 +2413,11 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@ -2617,6 +2632,7 @@ define amdgpu_kernel void @flat_workgroup_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -2854,7 +2870,9 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -3092,7 +3110,9 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -3347,6 +3367,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -3618,8 +3639,9 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -3888,7 +3910,9 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -4177,9 +4201,11 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -4467,9 +4493,11 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -4740,8 +4768,9 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -5012,8 +5041,9 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -5301,9 +5331,11 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -5591,9 +5623,11 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -5881,9 +5915,11 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -6171,9 +6207,11 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -6469,6 +6507,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -6781,6 +6820,7 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -7098,7 +7138,9 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -7430,7 +7472,9 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -7762,7 +7806,9 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -8077,6 +8123,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -8389,6 +8436,7 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -8720,7 +8768,9 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -9052,7 +9102,9 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -9384,7 +9436,9 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -9716,7 +9770,9 @@ define amdgpu_kernel void @flat_workgroup_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -10046,7 +10102,9 @@ define amdgpu_kernel void @flat_workgroup_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -10378,7 +10436,9 @@ define amdgpu_kernel void @flat_workgroup_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -10710,7 +10770,9 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -11042,7 +11104,9 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -11844,6 +11908,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@ -12007,6 +12073,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -12166,6 +12233,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -12335,6 +12403,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -12504,6 +12575,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@ -12663,6 +12737,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -12832,7 +12907,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@ -13001,6 +13078,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@ -13180,7 +13260,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@ -13359,7 +13443,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@ -13570,6 +13658,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -13796,6 +13885,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -14022,6 +14114,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -14276,6 +14371,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -14538,7 +14634,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -14800,6 +14898,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@ -15072,7 +15173,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -15344,7 +15449,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -15606,7 +15715,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -15868,7 +15979,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -16140,7 +16253,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -16412,7 +16529,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -16684,7 +16805,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -16956,7 +17081,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -17228,7 +17357,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -17500,7 +17633,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -17772,7 +17909,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -18044,7 +18185,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@ -18340,6 +18485,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -18648,6 +18794,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -18958,6 +19105,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -19278,6 +19428,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -19598,6 +19751,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -19908,6 +20064,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -20216,6 +20373,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -20536,6 +20694,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -20856,6 +21017,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -21176,6 +21340,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -21496,6 +21663,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -21814,6 +21984,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -22134,6 +22307,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -22454,6 +22630,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@ -22774,6 +22953,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE

View File

@ -834,14 +834,10 @@ define amdgpu_kernel void @global_agent_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -1011,6 +1007,7 @@ define amdgpu_kernel void @global_agent_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -1177,6 +1174,7 @@ define amdgpu_kernel void @global_agent_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -1368,9 +1366,9 @@ define amdgpu_kernel void @global_agent_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@ -1563,9 +1561,9 @@ define amdgpu_kernel void @global_agent_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@ -1731,6 +1729,7 @@ define amdgpu_kernel void @global_agent_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -1924,6 +1923,7 @@ define amdgpu_kernel void @global_agent_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -2115,9 +2115,9 @@ define amdgpu_kernel void @global_agent_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@ -2337,9 +2337,9 @@ define amdgpu_kernel void @global_agent_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -2561,9 +2561,9 @@ define amdgpu_kernel void @global_agent_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -2775,6 +2775,7 @@ define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -3016,13 +3017,11 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -3263,13 +3262,11 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -3505,6 +3502,7 @@ define amdgpu_kernel void @global_agent_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -3767,6 +3765,7 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -4027,9 +4026,9 @@ define amdgpu_kernel void @global_agent_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@ -4318,9 +4317,9 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -4611,9 +4610,9 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -4879,6 +4878,7 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -5143,6 +5143,7 @@ define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -5432,9 +5433,9 @@ define amdgpu_kernel void @global_agent_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -5725,9 +5726,9 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -6018,9 +6019,9 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -6311,9 +6312,9 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -6604,9 +6605,9 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -6897,9 +6898,9 @@ define amdgpu_kernel void @global_agent_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -7190,9 +7191,9 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -7483,9 +7484,9 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -7752,6 +7753,7 @@ define amdgpu_kernel void @global_agent_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -8035,6 +8037,7 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -8328,9 +8331,9 @@ define amdgpu_kernel void @global_agent_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
@ -8644,13 +8647,11 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -8963,13 +8964,11 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -9257,9 +9256,8 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -9543,6 +9541,7 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -9856,13 +9855,11 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -10175,13 +10172,11 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -10494,13 +10489,11 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -10813,13 +10806,11 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -11128,9 +11119,9 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
@ -11445,13 +11436,11 @@ define amdgpu_kernel void @global_agent_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -11764,13 +11753,11 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -12083,13 +12070,11 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -12923,14 +12908,10 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -13100,6 +13081,7 @@ define amdgpu_kernel void @global_agent_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -13266,6 +13248,7 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -13457,10 +13440,10 @@ define amdgpu_kernel void @global_agent_one_as_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -13652,10 +13635,10 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -13820,6 +13803,7 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -14013,6 +13997,7 @@ define amdgpu_kernel void @global_agent_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -14204,10 +14189,10 @@ define amdgpu_kernel void @global_agent_one_as_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -14426,10 +14411,10 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -14650,10 +14635,10 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -14864,6 +14849,7 @@ define amdgpu_kernel void @global_agent_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -15105,13 +15091,11 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -15352,13 +15336,11 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -15594,6 +15576,7 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -15856,6 +15839,7 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -16116,10 +16100,10 @@ define amdgpu_kernel void @global_agent_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -16407,10 +16391,10 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -16700,10 +16684,10 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -16968,6 +16952,7 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -17232,6 +17217,7 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -17521,10 +17507,10 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -17814,10 +17800,10 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -18107,10 +18093,10 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -18400,10 +18386,10 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -18693,10 +18679,10 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -18986,10 +18972,10 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -19279,10 +19265,10 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -19572,10 +19558,10 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -19841,6 +19827,7 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -20124,6 +20111,7 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -20437,13 +20425,11 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -20756,13 +20742,11 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -21050,9 +21034,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -21336,6 +21319,7 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -21649,13 +21633,11 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -21968,13 +21950,11 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -22287,13 +22267,11 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -22606,13 +22584,11 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -22921,10 +22897,10 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@ -23238,13 +23214,11 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -23557,13 +23531,11 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -23876,13 +23848,11 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]

View File

@ -94,8 +94,6 @@ define amdgpu_kernel void @global_last_use_and_volatile_load(ptr addrspace(1) %i
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm

View File

@ -1118,8 +1118,6 @@ define amdgpu_kernel void @global_nontemporal_volatile_load(
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm

View File

@ -952,6 +952,7 @@ define amdgpu_kernel void @global_singlethread_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -1118,6 +1119,7 @@ define amdgpu_kernel void @global_singlethread_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -1284,6 +1286,7 @@ define amdgpu_kernel void @global_singlethread_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -1450,6 +1453,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -1614,6 +1618,7 @@ define amdgpu_kernel void @global_singlethread_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -1778,6 +1783,7 @@ define amdgpu_kernel void @global_singlethread_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -1942,6 +1948,7 @@ define amdgpu_kernel void @global_singlethread_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -2106,6 +2113,7 @@ define amdgpu_kernel void @global_singlethread_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -2270,6 +2278,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -2462,6 +2471,7 @@ define amdgpu_kernel void @global_singlethread_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -2657,6 +2667,7 @@ define amdgpu_kernel void @global_singlethread_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -2852,6 +2863,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -3087,6 +3099,7 @@ define amdgpu_kernel void @global_singlethread_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -3320,6 +3333,7 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -3553,6 +3567,7 @@ define amdgpu_kernel void @global_singlethread_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -3786,6 +3801,7 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -4019,6 +4035,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -4252,6 +4269,7 @@ define amdgpu_kernel void @global_singlethread_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -4485,6 +4503,7 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -4718,6 +4737,7 @@ define amdgpu_kernel void @global_singlethread_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -4951,6 +4971,7 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -5184,6 +5205,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -5417,6 +5439,7 @@ define amdgpu_kernel void @global_singlethread_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -5650,6 +5673,7 @@ define amdgpu_kernel void @global_singlethread_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -5883,6 +5907,7 @@ define amdgpu_kernel void @global_singlethread_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -6116,6 +6141,7 @@ define amdgpu_kernel void @global_singlethread_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -6349,6 +6375,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -6612,6 +6639,7 @@ define amdgpu_kernel void @global_singlethread_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -6879,6 +6907,7 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -7146,6 +7175,7 @@ define amdgpu_kernel void @global_singlethread_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -7413,6 +7443,7 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -7680,6 +7711,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -7947,6 +7979,7 @@ define amdgpu_kernel void @global_singlethread_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -8214,6 +8247,7 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -8481,6 +8515,7 @@ define amdgpu_kernel void @global_singlethread_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -8748,6 +8783,7 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -9015,6 +9051,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -9282,6 +9319,7 @@ define amdgpu_kernel void @global_singlethread_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -9549,6 +9587,7 @@ define amdgpu_kernel void @global_singlethread_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -9816,6 +9855,7 @@ define amdgpu_kernel void @global_singlethread_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -10083,6 +10123,7 @@ define amdgpu_kernel void @global_singlethread_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -10350,6 +10391,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -11301,6 +11343,7 @@ define amdgpu_kernel void @global_singlethread_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -11467,6 +11510,7 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -11633,6 +11677,7 @@ define amdgpu_kernel void @global_singlethread_one_as_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -11799,6 +11844,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -11963,6 +12009,7 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -12127,6 +12174,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -12291,6 +12339,7 @@ define amdgpu_kernel void @global_singlethread_one_as_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -12455,6 +12504,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -12619,6 +12669,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -12811,6 +12862,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -13006,6 +13058,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -13201,6 +13254,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -13436,6 +13490,7 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -13669,6 +13724,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -13902,6 +13958,7 @@ define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -14135,6 +14192,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -14368,6 +14426,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -14601,6 +14660,7 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -14834,6 +14894,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -15067,6 +15128,7 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -15300,6 +15362,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -15533,6 +15596,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -15766,6 +15830,7 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -15999,6 +16064,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -16232,6 +16298,7 @@ define amdgpu_kernel void @global_singlethread_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -16465,6 +16532,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -16698,6 +16766,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -16961,6 +17030,7 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_ret_cm
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -17228,6 +17298,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_ret_cmpx
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -17495,6 +17566,7 @@ define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_ret_cmpx
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -17762,6 +17834,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_ret_cmpx
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -18029,6 +18102,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_ret_cmpx
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -18296,6 +18370,7 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_acquire_ret_cmpx
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -18563,6 +18638,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -18830,6 +18906,7 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -19097,6 +19174,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -19364,6 +19442,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -19631,6 +19710,7 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_seq_cst_ret_cmpx
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -19898,6 +19978,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_seq_cst_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -20165,6 +20246,7 @@ define amdgpu_kernel void @global_singlethread_one_as_release_seq_cst_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -20432,6 +20514,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -20699,6 +20782,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]

View File

@ -838,14 +838,10 @@ define amdgpu_kernel void @global_system_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -1015,6 +1011,7 @@ define amdgpu_kernel void @global_system_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -1181,6 +1178,7 @@ define amdgpu_kernel void @global_system_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -1377,9 +1375,8 @@ define amdgpu_kernel void @global_system_release_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@ -1577,9 +1574,8 @@ define amdgpu_kernel void @global_system_seq_cst_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@ -1745,6 +1741,7 @@ define amdgpu_kernel void @global_system_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -1940,6 +1937,7 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -2136,9 +2134,8 @@ define amdgpu_kernel void @global_system_release_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@ -2365,9 +2362,8 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -2596,9 +2592,8 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -2812,6 +2807,7 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -3060,13 +3056,10 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -3314,13 +3307,10 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -3556,6 +3546,7 @@ define amdgpu_kernel void @global_system_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -3820,6 +3811,7 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -4085,9 +4077,8 @@ define amdgpu_kernel void @global_system_release_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@ -4383,9 +4374,8 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -4683,9 +4673,8 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -4953,6 +4942,7 @@ define amdgpu_kernel void @global_system_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -5219,6 +5209,7 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -5515,9 +5506,8 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -5815,9 +5805,8 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -6115,9 +6104,8 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -6415,9 +6403,8 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@ -6684,6 +6671,7 @@ define amdgpu_kernel void @global_system_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -6969,6 +6957,7 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -7289,13 +7278,10 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -7615,13 +7601,10 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -7911,9 +7894,8 @@ define amdgpu_kernel void @global_system_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -8199,6 +8181,7 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -8519,13 +8502,10 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -8845,13 +8825,10 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -9171,13 +9148,10 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -9497,13 +9471,10 @@ define amdgpu_kernel void @global_system_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -9819,9 +9790,8 @@ define amdgpu_kernel void @global_system_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
@ -10143,13 +10113,10 @@ define amdgpu_kernel void @global_system_relese_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -10469,13 +10436,10 @@ define amdgpu_kernel void @global_system_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -10795,13 +10759,10 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -11639,14 +11600,10 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -11816,6 +11773,7 @@ define amdgpu_kernel void @global_system_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -11982,6 +11940,7 @@ define amdgpu_kernel void @global_system_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -12178,10 +12137,9 @@ define amdgpu_kernel void @global_system_one_as_release_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -12378,10 +12336,9 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -12546,6 +12503,7 @@ define amdgpu_kernel void @global_system_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -12741,6 +12699,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -12937,10 +12896,9 @@ define amdgpu_kernel void @global_system_one_as_release_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -13166,10 +13124,9 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -13397,10 +13354,9 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -13613,6 +13569,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -13861,13 +13818,10 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -14115,13 +14069,10 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -14357,6 +14308,7 @@ define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -14621,6 +14573,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -14886,10 +14839,9 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -15184,10 +15136,9 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -15484,10 +15435,9 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -15754,6 +15704,7 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -16020,6 +15971,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -16316,10 +16268,9 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -16616,10 +16567,9 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -16916,10 +16866,9 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -17216,10 +17165,9 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -17516,10 +17464,9 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -17816,10 +17763,9 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -18116,10 +18062,9 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -18416,10 +18361,9 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -18685,6 +18629,7 @@ define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -18970,6 +18915,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -19268,10 +19214,9 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -19591,13 +19536,10 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -19917,13 +19859,10 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -20213,9 +20152,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -20501,6 +20439,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -20821,13 +20760,10 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -21147,13 +21083,10 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -21473,13 +21406,10 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -21799,13 +21729,10 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -22121,10 +22048,9 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@ -22445,13 +22371,10 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -22771,13 +22694,10 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -23097,13 +23017,10 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]

View File

@ -155,8 +155,6 @@ define amdgpu_kernel void @global_volatile_load_0(
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
@ -371,8 +369,6 @@ define amdgpu_kernel void @global_volatile_load_1(
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
@ -1047,7 +1043,9 @@ define amdgpu_kernel void @global_volatile_workgroup_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {

View File

@ -952,6 +952,7 @@ define amdgpu_kernel void @global_wavefront_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -1118,6 +1119,7 @@ define amdgpu_kernel void @global_wavefront_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -1284,6 +1286,7 @@ define amdgpu_kernel void @global_wavefront_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -1450,6 +1453,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -1614,6 +1618,7 @@ define amdgpu_kernel void @global_wavefront_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -1778,6 +1783,7 @@ define amdgpu_kernel void @global_wavefront_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -1942,6 +1948,7 @@ define amdgpu_kernel void @global_wavefront_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -2106,6 +2113,7 @@ define amdgpu_kernel void @global_wavefront_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -2270,6 +2278,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -2462,6 +2471,7 @@ define amdgpu_kernel void @global_wavefront_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -2657,6 +2667,7 @@ define amdgpu_kernel void @global_wavefront_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -2852,6 +2863,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -3087,6 +3099,7 @@ define amdgpu_kernel void @global_wavefront_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -3320,6 +3333,7 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -3553,6 +3567,7 @@ define amdgpu_kernel void @global_wavefront_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -3786,6 +3801,7 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -4019,6 +4035,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -4252,6 +4269,7 @@ define amdgpu_kernel void @global_wavefront_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -4485,6 +4503,7 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -4718,6 +4737,7 @@ define amdgpu_kernel void @global_wavefront_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -4951,6 +4971,7 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -5184,6 +5205,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -5417,6 +5439,7 @@ define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -5650,6 +5673,7 @@ define amdgpu_kernel void @global_wavefront_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -5883,6 +5907,7 @@ define amdgpu_kernel void @global_wavefront_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -6116,6 +6141,7 @@ define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -6349,6 +6375,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -6612,6 +6639,7 @@ define amdgpu_kernel void @global_wavefront_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -6879,6 +6907,7 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -7146,6 +7175,7 @@ define amdgpu_kernel void @global_wavefront_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -7413,6 +7443,7 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -7680,6 +7711,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -7947,6 +7979,7 @@ define amdgpu_kernel void @global_wavefront_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -8214,6 +8247,7 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -8481,6 +8515,7 @@ define amdgpu_kernel void @global_wavefront_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -8748,6 +8783,7 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -9015,6 +9051,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -9282,6 +9319,7 @@ define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -9549,6 +9587,7 @@ define amdgpu_kernel void @global_wavefront_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -9816,6 +9855,7 @@ define amdgpu_kernel void @global_wavefront_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -10083,6 +10123,7 @@ define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -10350,6 +10391,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -11301,6 +11343,7 @@ define amdgpu_kernel void @global_wavefront_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -11467,6 +11510,7 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -11633,6 +11677,7 @@ define amdgpu_kernel void @global_wavefront_one_as_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -11799,6 +11844,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -11963,6 +12009,7 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -12127,6 +12174,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -12291,6 +12339,7 @@ define amdgpu_kernel void @global_wavefront_one_as_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -12455,6 +12504,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -12619,6 +12669,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -12811,6 +12862,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -13006,6 +13058,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -13201,6 +13254,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -13436,6 +13490,7 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -13669,6 +13724,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -13902,6 +13958,7 @@ define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -14135,6 +14192,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -14368,6 +14426,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -14601,6 +14660,7 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -14834,6 +14894,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -15067,6 +15128,7 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -15300,6 +15362,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -15533,6 +15596,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -15766,6 +15830,7 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -15999,6 +16064,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -16232,6 +16298,7 @@ define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -16465,6 +16532,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -16698,6 +16766,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -16961,6 +17030,7 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_ret_cmpxc
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -17228,6 +17298,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -17495,6 +17566,7 @@ define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -17762,6 +17834,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -18029,6 +18102,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -18296,6 +18370,7 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -18563,6 +18638,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -18830,6 +18906,7 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -19097,6 +19174,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -19364,6 +19442,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -19631,6 +19710,7 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -19898,6 +19978,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -20165,6 +20246,7 @@ define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -20432,6 +20514,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -20699,6 +20782,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]

View File

@ -804,7 +804,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt 0x0
@ -975,6 +976,7 @@ define amdgpu_kernel void @global_workgroup_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -1141,6 +1143,7 @@ define amdgpu_kernel void @global_workgroup_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -1325,7 +1328,9 @@ define amdgpu_kernel void @global_workgroup_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -1510,7 +1515,9 @@ define amdgpu_kernel void @global_workgroup_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -1675,6 +1682,7 @@ define amdgpu_kernel void @global_workgroup_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -1849,7 +1857,9 @@ define amdgpu_kernel void @global_workgroup_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
@ -2031,7 +2041,9 @@ define amdgpu_kernel void @global_workgroup_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -2224,8 +2236,11 @@ define amdgpu_kernel void @global_workgroup_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
@ -2417,8 +2432,11 @@ define amdgpu_kernel void @global_workgroup_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
@ -2615,6 +2633,7 @@ define amdgpu_kernel void @global_workgroup_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -2835,7 +2854,9 @@ define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -3056,7 +3077,9 @@ define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -3292,6 +3315,7 @@ define amdgpu_kernel void @global_workgroup_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -3535,7 +3559,9 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -3786,7 +3812,9 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -4048,8 +4076,11 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -4310,8 +4341,11 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -4554,7 +4588,9 @@ define amdgpu_kernel void @global_workgroup_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -4797,7 +4833,9 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -5058,8 +5096,11 @@ define amdgpu_kernel void @global_workgroup_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -5320,8 +5361,11 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -5582,8 +5626,11 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -5844,8 +5891,11 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -6106,8 +6156,11 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -6368,8 +6421,11 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -6630,8 +6686,11 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -6892,8 +6951,11 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -7156,6 +7218,7 @@ define amdgpu_kernel void @global_workgroup_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -7428,6 +7491,7 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -7713,7 +7777,9 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -8006,7 +8072,9 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -8299,7 +8367,9 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -8574,6 +8644,7 @@ define amdgpu_kernel void @global_workgroup_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -8846,6 +8917,7 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -9138,7 +9210,9 @@ define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -9431,7 +9505,9 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -9724,7 +9800,9 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -10017,7 +10095,9 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -10308,7 +10388,9 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -10601,7 +10683,9 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -10894,7 +10978,9 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -11187,7 +11273,9 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -11987,6 +12075,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt 0x0
@ -12157,6 +12247,7 @@ define amdgpu_kernel void @global_workgroup_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -12323,6 +12414,7 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -12499,6 +12591,9 @@ define amdgpu_kernel void @global_workgroup_one_as_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -12675,6 +12770,9 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@ -12839,6 +12937,7 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -13013,7 +13112,9 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
@ -13187,6 +13288,9 @@ define amdgpu_kernel void @global_workgroup_one_as_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@ -13371,7 +13475,11 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
@ -13555,7 +13663,11 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
@ -13752,6 +13864,7 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -13964,6 +14077,9 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -14176,6 +14292,9 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -14411,6 +14530,7 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -14654,7 +14774,9 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -14897,6 +15019,9 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@ -15150,7 +15275,11 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -15403,7 +15532,11 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -15646,7 +15779,9 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -15889,7 +16024,9 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -16142,7 +16279,11 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -16395,7 +16536,11 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -16648,7 +16793,11 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -16901,7 +17050,11 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -17154,7 +17307,11 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -17407,7 +17564,11 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -17660,7 +17821,11 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -17913,7 +18078,11 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@ -18176,6 +18345,7 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_ret_cmpxc
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -18448,6 +18618,7 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -18725,6 +18896,9 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -19009,6 +19183,9 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -19293,6 +19470,9 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -19567,6 +19747,7 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_acquire_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -19839,6 +20020,7 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -20123,6 +20305,9 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -20407,6 +20592,9 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -20691,6 +20879,9 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -20975,6 +21166,9 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -21257,6 +21451,9 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -21541,6 +21738,9 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -21825,6 +22025,9 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@ -22109,6 +22312,9 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]

View File

@ -762,7 +762,8 @@ define amdgpu_kernel void @local_agent_seq_cst_load(
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_load_b32 v1, v0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -1235,7 +1236,8 @@ define amdgpu_kernel void @local_agent_release_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
@ -1404,7 +1406,8 @@ define amdgpu_kernel void @local_agent_seq_cst_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
@ -1890,7 +1893,8 @@ define amdgpu_kernel void @local_agent_release_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
@ -2075,7 +2079,8 @@ define amdgpu_kernel void @local_agent_acq_rel_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -2261,7 +2266,8 @@ define amdgpu_kernel void @local_agent_seq_cst_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -2679,7 +2685,8 @@ define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -2899,7 +2906,8 @@ define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -3475,7 +3483,8 @@ define amdgpu_kernel void @local_agent_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
@ -3689,7 +3698,8 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -3904,7 +3914,8 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -4511,7 +4522,8 @@ define amdgpu_kernel void @local_agent_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -4726,7 +4738,8 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -4941,7 +4954,8 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -5156,7 +5170,8 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -5371,7 +5386,8 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -5586,7 +5602,8 @@ define amdgpu_kernel void @local_agent_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -5801,7 +5818,8 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -6016,7 +6034,8 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -6714,7 +6733,8 @@ define amdgpu_kernel void @local_agent_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -6964,7 +6984,8 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -7214,7 +7235,8 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -7926,7 +7948,8 @@ define amdgpu_kernel void @local_agent_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -8176,7 +8199,8 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -8426,7 +8450,8 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -8676,7 +8701,8 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -8926,7 +8952,8 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -9176,7 +9203,8 @@ define amdgpu_kernel void @local_agent_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -9426,7 +9454,8 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -9676,7 +9705,8 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0

View File

@ -762,7 +762,8 @@ define amdgpu_kernel void @local_system_seq_cst_load(
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_load_b32 v1, v0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -1235,7 +1236,8 @@ define amdgpu_kernel void @local_system_release_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
@ -1404,7 +1406,8 @@ define amdgpu_kernel void @local_system_seq_cst_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
@ -1890,7 +1893,8 @@ define amdgpu_kernel void @local_system_release_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
@ -2075,7 +2079,8 @@ define amdgpu_kernel void @local_system_acq_rel_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -2261,7 +2266,8 @@ define amdgpu_kernel void @local_system_seq_cst_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -2679,7 +2685,8 @@ define amdgpu_kernel void @local_system_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -2899,7 +2906,8 @@ define amdgpu_kernel void @local_system_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -3475,7 +3483,8 @@ define amdgpu_kernel void @local_system_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
@ -3689,7 +3698,8 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -3904,7 +3914,8 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -4511,7 +4522,8 @@ define amdgpu_kernel void @local_system_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -4726,7 +4738,8 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -4941,7 +4954,8 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -5156,7 +5170,8 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -5371,7 +5386,8 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -5586,7 +5602,8 @@ define amdgpu_kernel void @local_system_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -5801,7 +5818,8 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -6016,7 +6034,8 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -6714,7 +6733,8 @@ define amdgpu_kernel void @local_system_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -6964,7 +6984,8 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -7214,7 +7235,8 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -7926,7 +7948,8 @@ define amdgpu_kernel void @local_system_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -8176,7 +8199,8 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -8426,7 +8450,8 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -8676,7 +8701,8 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -8926,7 +8952,8 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -9176,7 +9203,8 @@ define amdgpu_kernel void @local_system_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -9426,7 +9454,8 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -9676,7 +9705,8 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0

View File

@ -890,7 +890,8 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {

View File

@ -762,7 +762,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_load(
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_load_b32 v1, v0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -1235,7 +1236,8 @@ define amdgpu_kernel void @local_workgroup_release_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
@ -1404,7 +1406,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
@ -1890,7 +1893,8 @@ define amdgpu_kernel void @local_workgroup_release_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
@ -2075,7 +2079,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -2261,7 +2266,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -2679,7 +2685,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -2899,7 +2906,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -3475,7 +3483,8 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
@ -3689,7 +3698,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -3904,7 +3914,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -4511,7 +4522,8 @@ define amdgpu_kernel void @local_workgroup_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -4726,7 +4738,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -4941,7 +4954,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -5156,7 +5170,8 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -5371,7 +5386,8 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -5586,7 +5602,8 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -5801,7 +5818,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -6016,7 +6034,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@ -6714,7 +6733,8 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
@ -6964,7 +6984,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -7214,7 +7235,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -7926,7 +7948,8 @@ define amdgpu_kernel void @local_workgroup_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -8176,7 +8199,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -8426,7 +8450,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -8676,7 +8701,8 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -8926,7 +8952,8 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -9176,7 +9203,8 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -9426,7 +9454,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@ -9676,7 +9705,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0