[AMDGPU] Fix codegen to emit COPY instead of S_MOV_B64 for aperture regs (#158754)

This commit is contained in:
Stanislav Mekhanoshin 2025-09-16 02:26:32 -07:00 committed by GitHub
parent b9f84bce67
commit 76efbc068a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
24 changed files with 1128 additions and 1175 deletions

View File

@ -2293,16 +2293,9 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
assert((ApertureRegNo != AMDGPU::SRC_PRIVATE_BASE ||
!ST.hasGloballyAddressableScratch()) &&
"Cannot use src_private_base with globally addressable scratch!");
// FIXME: It would be more natural to emit a COPY here, but then copy
// coalescing would kick in and it would think it's okay to use the "HI"
// subregister (instead of extracting the HI 32 bits) which is an artificial
// (unusable) register.
// Register TableGen definitions would need an overhaul to get rid of the
// artificial "HI" aperture registers and prevent this kind of issue from
// happening.
Register Dst = MRI.createGenericVirtualRegister(S64);
MRI.setRegClass(Dst, &AMDGPU::SReg_64RegClass);
B.buildInstr(AMDGPU::S_MOV_B64, {Dst}, {Register(ApertureRegNo)});
B.buildCopy({Dst}, {Register(ApertureRegNo)});
return B.buildUnmerge(S32, Dst).getReg(1);
}

View File

@ -8159,25 +8159,14 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
// it returns a wrong value (all zeroes?). The real value is in the upper 32
// bits.
//
// To work around the issue, directly emit a 64 bit mov from this register
// To work around the issue, emit a 64 bit copy from this register
// then extract the high bits. Note that this shouldn't even result in a
// shift being emitted and simply become a pair of registers (e.g.):
// s_mov_b64 s[6:7], src_shared_base
// v_mov_b32_e32 v1, s7
//
// FIXME: It would be more natural to emit a CopyFromReg here, but then copy
// coalescing would kick in and it would think it's okay to use the "HI"
// subregister directly (instead of extracting the HI 32 bits) which is an
// artificial (unusable) register.
// Register TableGen definitions would need an overhaul to get rid of the
// artificial "HI" aperture registers and prevent this kind of issue from
// happening.
SDNode *Mov = DAG.getMachineNode(AMDGPU::S_MOV_B64, DL, MVT::i64,
DAG.getRegister(ApertureRegNo, MVT::i64));
return DAG.getNode(
ISD::TRUNCATE, DL, MVT::i32,
DAG.getNode(ISD::SRL, DL, MVT::i64,
{SDValue(Mov, 0), DAG.getConstant(32, DL, MVT::i64)}));
SDValue Copy =
DAG.getCopyFromReg(DAG.getEntryNode(), DL, ApertureRegNo, MVT::v2i32);
return DAG.getExtractVectorElt(DL, MVT::i32, Copy, 1);
}
// For code object version 5, private_base and shared_base are passed through

View File

@ -9,12 +9,11 @@
define amdgpu_ps void @amdgpu_ps() {
; MESA-LABEL: amdgpu_ps:
; MESA: ; %bb.0:
; MESA-NEXT: s_add_u32 flat_scratch_lo, s2, s4
; MESA-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; MESA-NEXT: s_mov_b64 s[0:1], src_private_base
; MESA-NEXT: s_mov_b32 s0, 0
; MESA-NEXT: s_mov_b64 s[2:3], src_private_base
; MESA-NEXT: s_mov_b32 s1, s3
; MESA-NEXT: s_add_u32 flat_scratch_lo, s2, s4
; MESA-NEXT: v_mov_b32_e32 v0, s0
; MESA-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; MESA-NEXT: v_mov_b32_e32 v2, 0
; MESA-NEXT: v_mov_b32_e32 v1, s1
; MESA-NEXT: flat_store_dword v[0:1], v2
@ -30,11 +29,10 @@ define amdgpu_ps void @amdgpu_ps() {
; PAL-NEXT: s_waitcnt lgkmcnt(0)
; PAL-NEXT: s_and_b32 s3, s3, 0xffff
; PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s0
; PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; PAL-NEXT: s_mov_b64 s[0:1], src_private_base
; PAL-NEXT: s_mov_b32 s0, 0
; PAL-NEXT: s_mov_b64 s[2:3], src_private_base
; PAL-NEXT: s_mov_b32 s1, s3
; PAL-NEXT: v_mov_b32_e32 v0, s0
; PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; PAL-NEXT: v_mov_b32_e32 v1, s1
; PAL-NEXT: flat_store_dword v[0:1], v2
; PAL-NEXT: s_waitcnt vmcnt(0)

View File

@ -65,52 +65,52 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
;
; GFX9V4-LABEL: addrspacecast:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9V4-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
; GFX9V4-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; GFX9V4-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; GFX9V4-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX9V4-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX9V4-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9V4-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V4-NEXT: s_mov_b32 s2, s0
; GFX9V4-NEXT: s_cmp_lg_u32 s0, -1
; GFX9V4-NEXT: s_mov_b32 s0, s4
; GFX9V4-NEXT: s_cmp_lg_u32 s4, -1
; GFX9V4-NEXT: s_cselect_b64 s[0:1], s[0:1], 0
; GFX9V4-NEXT: s_mov_b32 s2, s5
; GFX9V4-NEXT: s_cmp_lg_u32 s5, -1
; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
; GFX9V4-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
; GFX9V4-NEXT: s_mov_b32 s4, s1
; GFX9V4-NEXT: s_cmp_lg_u32 s1, -1
; GFX9V4-NEXT: v_mov_b32_e32 v0, s2
; GFX9V4-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
; GFX9V4-NEXT: v_mov_b32_e32 v2, 1
; GFX9V4-NEXT: v_mov_b32_e32 v1, s3
; GFX9V4-NEXT: v_mov_b32_e32 v1, s1
; GFX9V4-NEXT: flat_store_dword v[0:1], v2
; GFX9V4-NEXT: s_waitcnt vmcnt(0)
; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
; GFX9V4-NEXT: v_mov_b32_e32 v0, s2
; GFX9V4-NEXT: v_mov_b32_e32 v2, 2
; GFX9V4-NEXT: v_mov_b32_e32 v1, s1
; GFX9V4-NEXT: v_mov_b32_e32 v1, s3
; GFX9V4-NEXT: flat_store_dword v[0:1], v2
; GFX9V4-NEXT: s_waitcnt vmcnt(0)
; GFX9V4-NEXT: s_endpgm
;
; GFX9V5-LABEL: addrspacecast:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9V5-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
; GFX9V5-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; GFX9V5-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; GFX9V5-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX9V5-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX9V5-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9V5-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V5-NEXT: s_mov_b32 s2, s0
; GFX9V5-NEXT: s_cmp_lg_u32 s0, -1
; GFX9V5-NEXT: s_mov_b32 s0, s4
; GFX9V5-NEXT: s_cmp_lg_u32 s4, -1
; GFX9V5-NEXT: s_cselect_b64 s[0:1], s[0:1], 0
; GFX9V5-NEXT: s_mov_b32 s2, s5
; GFX9V5-NEXT: s_cmp_lg_u32 s5, -1
; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
; GFX9V5-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
; GFX9V5-NEXT: s_mov_b32 s4, s1
; GFX9V5-NEXT: s_cmp_lg_u32 s1, -1
; GFX9V5-NEXT: v_mov_b32_e32 v0, s2
; GFX9V5-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
; GFX9V5-NEXT: v_mov_b32_e32 v2, 1
; GFX9V5-NEXT: v_mov_b32_e32 v1, s3
; GFX9V5-NEXT: v_mov_b32_e32 v1, s1
; GFX9V5-NEXT: flat_store_dword v[0:1], v2
; GFX9V5-NEXT: s_waitcnt vmcnt(0)
; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
; GFX9V5-NEXT: v_mov_b32_e32 v0, s2
; GFX9V5-NEXT: v_mov_b32_e32 v2, 2
; GFX9V5-NEXT: v_mov_b32_e32 v1, s1
; GFX9V5-NEXT: v_mov_b32_e32 v1, s3
; GFX9V5-NEXT: flat_store_dword v[0:1], v2
; GFX9V5-NEXT: s_waitcnt vmcnt(0)
; GFX9V5-NEXT: s_endpgm
@ -150,10 +150,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) #0 {
;
; GFX9V4-LABEL: llvm_amdgcn_is_shared:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9V4-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX9V4-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
; GFX9V4-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V4-NEXT: s_cmp_eq_u32 s1, s3
; GFX9V4-NEXT: s_cmp_eq_u32 s3, s1
; GFX9V4-NEXT: s_cselect_b32 s0, 1, 0
; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
; GFX9V4-NEXT: global_store_dword v[0:1], v0, off
@ -162,10 +162,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) #0 {
;
; GFX9V5-LABEL: llvm_amdgcn_is_shared:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9V5-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX9V5-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
; GFX9V5-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V5-NEXT: s_cmp_eq_u32 s1, s3
; GFX9V5-NEXT: s_cmp_eq_u32 s3, s1
; GFX9V5-NEXT: s_cselect_b32 s0, 1, 0
; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
; GFX9V5-NEXT: global_store_dword v[0:1], v0, off
@ -206,10 +206,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) #0 {
;
; GFX9V4-LABEL: llvm_amdgcn_is_private:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9V4-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX9V4-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
; GFX9V4-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V4-NEXT: s_cmp_eq_u32 s1, s3
; GFX9V4-NEXT: s_cmp_eq_u32 s3, s1
; GFX9V4-NEXT: s_cselect_b32 s0, 1, 0
; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
; GFX9V4-NEXT: global_store_dword v[0:1], v0, off
@ -218,10 +218,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) #0 {
;
; GFX9V5-LABEL: llvm_amdgcn_is_private:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9V5-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX9V5-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
; GFX9V5-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V5-NEXT: s_cmp_eq_u32 s1, s3
; GFX9V5-NEXT: s_cmp_eq_u32 s3, s1
; GFX9V5-NEXT: s_cselect_b32 s0, 1, 0
; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
; GFX9V5-NEXT: global_store_dword v[0:1], v0, off

View File

@ -158,8 +158,8 @@ body: |
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5)
; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_private_base
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64)
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_64(s64) = COPY $src_private_base
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV1]](s32)
; GFX9-NEXT: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1
; GFX9-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
@ -227,8 +227,8 @@ body: |
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_shared_base
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64)
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_64(s64) = COPY $src_shared_base
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV1]](s32)
; GFX9-NEXT: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
; GFX9-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
@ -380,16 +380,16 @@ body: |
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>)
; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_shared_base
; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64)
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_64(s64) = COPY $src_shared_base
; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV3]](s32)
; GFX9-NEXT: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
; GFX9-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]]
; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
; GFX9-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
; GFX9-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_shared_base
; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_1]](s64)
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_64(s64) = COPY $src_shared_base
; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](s64)
; GFX9-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[UV5]](s32)
; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C]]
; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](s1), [[MV1]], [[C1]]
@ -517,8 +517,8 @@ body: |
; GFX9-LABEL: name: test_addrspacecast_p5_fi_to_p0
; GFX9: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[FRAME_INDEX]](p5)
; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_private_base
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64)
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $src_private_base
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV1]](s32)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0)
%0:_(p5) = G_FRAME_INDEX %stack.0

View File

@ -361,8 +361,8 @@ define void @flat_atomic_cmpxchg_i64_ret_av_av__av(ptr %ptr) #0 {
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, 0x50, v0
; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s5, v7
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v[2:3]
@ -417,8 +417,8 @@ define void @flat_atomic_cmpxchg_i64_ret_av_av__v(ptr %ptr) #0 {
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, 0x50, v0
; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s5, v7
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v[2:3]
@ -473,8 +473,8 @@ define void @flat_atomic_cmpxchg_i64_ret_av_av__a(ptr %ptr) #0 {
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_add_co_u32_e32 v4, vcc, 0x50, v0
; CHECK-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
; CHECK-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v[2:3]
@ -538,13 +538,13 @@ define void @flat_atomic_cmpxchg_i64_ret_a_a__a(ptr %ptr) #0 {
; CHECK-NEXT: ; def a[0:1]
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
; CHECK-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def a[0:1]
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_accvgpr_read_b32 v0, a0
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
; CHECK-NEXT: v_accvgpr_read_b32 v1, a1
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; CHECK-NEXT: ; implicit-def: $agpr0_agpr1
@ -603,13 +603,13 @@ define void @flat_atomic_cmpxchg_i64_ret_a_a__v(ptr %ptr) #0 {
; CHECK-NEXT: ; def a[0:1]
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def a[0:1]
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_accvgpr_read_b32 v0, a0
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
; CHECK-NEXT: v_accvgpr_read_b32 v1, a1
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s5, v7
; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
@ -659,12 +659,12 @@ define void @flat_atomic_cmpxchg_i64_ret_v_a__v(ptr %ptr) #0 {
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, 0x50, v0
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def a[0:1]
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_accvgpr_read_b32 v0, a0
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
; CHECK-NEXT: v_accvgpr_read_b32 v1, a1
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s5, v7
; CHECK-NEXT: ;;#ASMSTART
@ -717,12 +717,12 @@ define void @flat_atomic_cmpxchg_i64_ret_a_v__v(ptr %ptr) #0 {
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, 0x50, v0
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def a[0:1]
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s5, v7
; CHECK-NEXT: ;;#ASMSTART
@ -775,8 +775,8 @@ define void @flat_atomic_cmpxchg_i64_ret_v_v__a(ptr %ptr) #0 {
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_add_co_u32_e32 v4, vcc, 0x50, v0
; CHECK-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
; CHECK-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v[2:3]
@ -836,8 +836,8 @@ define void @flat_atomic_cmpxchg_i64_ret_av_v__av(ptr %ptr) #0 {
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, 0x50, v0
; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s5, v7
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v[2:3]
@ -892,8 +892,8 @@ define void @flat_atomic_cmpxchg_i64_ret_v_av__av(ptr %ptr) #0 {
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, 0x50, v0
; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s5, v7
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v[2:3]
@ -948,12 +948,12 @@ define void @flat_atomic_cmpxchg_i64_ret_av_a__av(ptr %ptr) #0 {
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, 0x50, v0
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def a[0:1]
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_accvgpr_read_b32 v0, a0
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
; CHECK-NEXT: v_accvgpr_read_b32 v1, a1
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s5, v7
; CHECK-NEXT: ;;#ASMSTART
@ -1006,12 +1006,12 @@ define void @flat_atomic_cmpxchg_i64_ret_a_av__av(ptr %ptr) #0 {
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, 0x50, v0
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def a[0:1]
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s5, v7
; CHECK-NEXT: ;;#ASMSTART

File diff suppressed because it is too large Load Diff

View File

@ -8,8 +8,8 @@ target triple = "amdgcn-amd-amdhsa"
; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
; CI-DAG: s_cmp_lg_u32 [[PTR]], -1
; CI-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[APERTURE]], 0
; CI-DAG: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0
; CI-DAG: s_cselect_b32 s[[SHI:[0-9]+]], [[APERTURE]], 0
; CI-DAG: s_cselect_b32 s[[SLO:[0-9]+]], [[PTR]], 0
; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_shared_base
@ -17,10 +17,13 @@ target triple = "amdgcn-amd-amdhsa"
; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
; GFX9: s_cmp_lg_u32 [[PTR]], -1
; GFX9-DAG: s_cselect_b32 s[[LO:[0-9]+]], s[[HIBASE]], 0
; GFX9-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[PTR]], 0
; GFX9-DAG: s_cselect_b32 s[[SHI:[0-9]+]], s[[HIBASE]], 0
; GFX9-DAG: s_cselect_b32 s[[SLO:[0-9]+]], [[PTR]], 0
; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
; HSA-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
; HSA-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
; HSA: flat_store_dword v[[[VLO]]:[[VHI]]], [[K]]
; HSA: .amdhsa_user_sgpr_private_segment_buffer 1
; HSA: .amdhsa_user_sgpr_dispatch_ptr 0
@ -68,18 +71,21 @@ define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) #0 {
; CI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
; CI-DAG: s_cmp_lg_u32 [[PTR]], -1
; CI-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[APERTURE]], 0
; CI-DAG: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0
; CI-DAG: s_cselect_b32 s[[SHI:[0-9]+]], [[APERTURE]], 0
; CI-DAG: s_cselect_b32 s[[SLO:[0-9]+]], [[PTR]], 0
; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_private_base
; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
; GFX9: s_cmp_lg_u32 [[PTR]], -1
; GFX9: s_cselect_b32 s[[LO:[0-9]+]], s[[HIBASE]], 0
; GFX9: s_cselect_b32 s[[HI:[0-9]+]], [[PTR]], 0
; GFX9: s_cselect_b32 s[[SHI:[0-9]+]], s[[HIBASE]], 0
; GFX9: s_cselect_b32 s[[SLO:[0-9]+]], [[PTR]], 0
; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
; HSA-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
; HSA-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
; HSA: flat_store_dword v[[[VLO]]:[[VHI]]], [[K]]
; HSA: .amdhsa_user_sgpr_private_segment_buffer 1
; HSA: .amdhsa_user_sgpr_dispatch_ptr 0

View File

@ -456,12 +456,10 @@ define i64 @optnone_atomicrmw_add_i64_expand(i64 %val) #1 {
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX908-NEXT: s_mov_b32 s6, 32
; GFX908-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GFX908-NEXT: s_getpc_b64 s[6:7]
; GFX908-NEXT: s_add_u32 s6, s6, global@rel32@lo+4
; GFX908-NEXT: s_addc_u32 s7, s7, global@rel32@hi+12
; GFX908-NEXT: s_cmp_eq_u32 s7, s4
; GFX908-NEXT: s_cmp_eq_u32 s7, s5
; GFX908-NEXT: s_cselect_b64 s[4:5], -1, 0
; GFX908-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
; GFX908-NEXT: s_mov_b64 s[4:5], -1
@ -507,12 +505,10 @@ define i64 @optnone_atomicrmw_add_i64_expand(i64 %val) #1 {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX90A-NEXT: s_mov_b32 s6, 32
; GFX90A-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GFX90A-NEXT: s_getpc_b64 s[6:7]
; GFX90A-NEXT: s_add_u32 s6, s6, global@rel32@lo+4
; GFX90A-NEXT: s_addc_u32 s7, s7, global@rel32@hi+12
; GFX90A-NEXT: s_cmp_eq_u32 s7, s4
; GFX90A-NEXT: s_cmp_eq_u32 s7, s5
; GFX90A-NEXT: s_cselect_b64 s[4:5], -1, 0
; GFX90A-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
; GFX90A-NEXT: s_mov_b64 s[4:5], -1
@ -558,12 +554,10 @@ define i64 @optnone_atomicrmw_add_i64_expand(i64 %val) #1 {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX942-NEXT: s_mov_b32 s2, 32
; GFX942-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX942-NEXT: s_getpc_b64 s[2:3]
; GFX942-NEXT: s_add_u32 s2, s2, global@rel32@lo+4
; GFX942-NEXT: s_addc_u32 s3, s3, global@rel32@hi+12
; GFX942-NEXT: s_cmp_eq_u32 s3, s0
; GFX942-NEXT: s_cmp_eq_u32 s3, s1
; GFX942-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX942-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
; GFX942-NEXT: s_mov_b64 s[0:1], -1
@ -607,12 +601,10 @@ define i64 @optnone_atomicrmw_add_i64_expand(i64 %val) #1 {
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX1100-NEXT: s_mov_b32 s2, 32
; GFX1100-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX1100-NEXT: s_getpc_b64 s[2:3]
; GFX1100-NEXT: s_add_u32 s2, s2, global@rel32@lo+4
; GFX1100-NEXT: s_addc_u32 s3, s3, global@rel32@hi+12
; GFX1100-NEXT: s_cmp_eq_u32 s3, s0
; GFX1100-NEXT: s_cmp_eq_u32 s3, s1
; GFX1100-NEXT: s_cselect_b32 s0, -1, 0
; GFX1100-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
; GFX1100-NEXT: s_mov_b32 s0, -1
@ -660,9 +652,6 @@ define i64 @optnone_atomicrmw_add_i64_expand(i64 %val) #1 {
; GFX1200-NEXT: s_wait_bvhcnt 0x0
; GFX1200-NEXT: s_wait_kmcnt 0x0
; GFX1200-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX1200-NEXT: s_mov_b32 s2, 32
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX1200-NEXT: s_getpc_b64 s[2:3]
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: s_sext_i32_i16 s3, s3
@ -670,7 +659,7 @@ define i64 @optnone_atomicrmw_add_i64_expand(i64 %val) #1 {
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: s_add_co_ci_u32 s3, s3, global@rel32@hi+24
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: s_cmp_eq_u32 s3, s0
; GFX1200-NEXT: s_cmp_eq_u32 s3, s1
; GFX1200-NEXT: s_cselect_b32 s0, -1, 0
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
@ -731,12 +720,10 @@ define double @optnone_atomicrmw_fadd_f64_expand(double %val) #1 {
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX908-NEXT: s_mov_b32 s6, 32
; GFX908-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GFX908-NEXT: s_getpc_b64 s[6:7]
; GFX908-NEXT: s_add_u32 s6, s6, global@rel32@lo+4
; GFX908-NEXT: s_addc_u32 s7, s7, global@rel32@hi+12
; GFX908-NEXT: s_cmp_eq_u32 s7, s4
; GFX908-NEXT: s_cmp_eq_u32 s7, s5
; GFX908-NEXT: s_cselect_b64 s[4:5], -1, 0
; GFX908-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
; GFX908-NEXT: s_mov_b64 s[4:5], -1
@ -800,12 +787,10 @@ define double @optnone_atomicrmw_fadd_f64_expand(double %val) #1 {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: s_mov_b32 s6, 32
; GFX90A-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GFX90A-NEXT: s_getpc_b64 s[6:7]
; GFX90A-NEXT: s_add_u32 s6, s6, global@rel32@lo+4
; GFX90A-NEXT: s_addc_u32 s7, s7, global@rel32@hi+12
; GFX90A-NEXT: s_cmp_eq_u32 s7, s4
; GFX90A-NEXT: s_cmp_eq_u32 s7, s5
; GFX90A-NEXT: s_cselect_b64 s[4:5], -1, 0
; GFX90A-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
; GFX90A-NEXT: s_mov_b64 s[4:5], -1
@ -825,12 +810,10 @@ define double @optnone_atomicrmw_fadd_f64_expand(double %val) #1 {
; GFX90A-NEXT: s_branch .LBB5_10
; GFX90A-NEXT: .LBB5_3: ; %atomicrmw.check.private
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX90A-NEXT: s_mov_b32 s6, 32
; GFX90A-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GFX90A-NEXT: s_getpc_b64 s[6:7]
; GFX90A-NEXT: s_add_u32 s6, s6, global@rel32@lo+4
; GFX90A-NEXT: s_addc_u32 s7, s7, global@rel32@hi+12
; GFX90A-NEXT: s_cmp_eq_u32 s7, s4
; GFX90A-NEXT: s_cmp_eq_u32 s7, s5
; GFX90A-NEXT: s_cselect_b64 s[4:5], -1, 0
; GFX90A-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
; GFX90A-NEXT: s_mov_b64 s[4:5], -1
@ -896,12 +879,10 @@ define double @optnone_atomicrmw_fadd_f64_expand(double %val) #1 {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX942-NEXT: s_mov_b32 s2, 32
; GFX942-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX942-NEXT: s_getpc_b64 s[2:3]
; GFX942-NEXT: s_add_u32 s2, s2, global@rel32@lo+4
; GFX942-NEXT: s_addc_u32 s3, s3, global@rel32@hi+12
; GFX942-NEXT: s_cmp_eq_u32 s3, s0
; GFX942-NEXT: s_cmp_eq_u32 s3, s1
; GFX942-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX942-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
; GFX942-NEXT: s_mov_b64 s[0:1], -1
@ -921,12 +902,10 @@ define double @optnone_atomicrmw_fadd_f64_expand(double %val) #1 {
; GFX942-NEXT: s_branch .LBB5_10
; GFX942-NEXT: .LBB5_3: ; %atomicrmw.check.private
; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX942-NEXT: s_mov_b32 s2, 32
; GFX942-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX942-NEXT: s_getpc_b64 s[2:3]
; GFX942-NEXT: s_add_u32 s2, s2, global@rel32@lo+4
; GFX942-NEXT: s_addc_u32 s3, s3, global@rel32@hi+12
; GFX942-NEXT: s_cmp_eq_u32 s3, s0
; GFX942-NEXT: s_cmp_eq_u32 s3, s1
; GFX942-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX942-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
; GFX942-NEXT: s_mov_b64 s[0:1], -1
@ -990,12 +969,10 @@ define double @optnone_atomicrmw_fadd_f64_expand(double %val) #1 {
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX1100-NEXT: s_mov_b32 s2, 32
; GFX1100-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX1100-NEXT: s_getpc_b64 s[2:3]
; GFX1100-NEXT: s_add_u32 s2, s2, global@rel32@lo+4
; GFX1100-NEXT: s_addc_u32 s3, s3, global@rel32@hi+12
; GFX1100-NEXT: s_cmp_eq_u32 s3, s0
; GFX1100-NEXT: s_cmp_eq_u32 s3, s1
; GFX1100-NEXT: s_cselect_b32 s0, -1, 0
; GFX1100-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
; GFX1100-NEXT: s_mov_b32 s0, -1
@ -1060,9 +1037,6 @@ define double @optnone_atomicrmw_fadd_f64_expand(double %val) #1 {
; GFX1200-NEXT: s_wait_bvhcnt 0x0
; GFX1200-NEXT: s_wait_kmcnt 0x0
; GFX1200-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX1200-NEXT: s_mov_b32 s2, 32
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX1200-NEXT: s_getpc_b64 s[2:3]
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: s_sext_i32_i16 s3, s3
@ -1070,7 +1044,7 @@ define double @optnone_atomicrmw_fadd_f64_expand(double %val) #1 {
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: s_add_co_ci_u32 s3, s3, global@rel32@hi+24
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: s_cmp_eq_u32 s3, s0
; GFX1200-NEXT: s_cmp_eq_u32 s3, s1
; GFX1200-NEXT: s_cselect_b32 s0, -1, 0
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0

View File

@ -134,57 +134,57 @@ define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(ptr addr
;
; GFX9-LABEL: with_private_to_flat_addrspacecast_cc_kernel:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s2, s[8:9], 0x0
; GFX9-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9-NEXT: s_load_dword s0, s[8:9], 0x0
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_cmp_lg_u32 s2, -1
; GFX9-NEXT: s_cselect_b32 s0, s1, 0
; GFX9-NEXT: s_cselect_b32 s1, s2, 0
; GFX9-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: s_cmp_lg_u32 s0, -1
; GFX9-NEXT: s_cselect_b32 s1, s1, 0
; GFX9-NEXT: s_cselect_b32 s0, s0, 0
; GFX9-NEXT: v_mov_b32_e32 v0, s0
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: flat_store_dword v[0:1], v2
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
;
; GFX9-ARCH-FLAT-LABEL: with_private_to_flat_addrspacecast_cc_kernel:
; GFX9-ARCH-FLAT: ; %bb.0:
; GFX9-ARCH-FLAT-NEXT: s_load_dword s2, s[4:5], 0x0
; GFX9-ARCH-FLAT-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9-ARCH-FLAT-NEXT: s_load_dword s0, s[4:5], 0x0
; GFX9-ARCH-FLAT-NEXT: v_mov_b32_e32 v2, 0
; GFX9-ARCH-FLAT-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-ARCH-FLAT-NEXT: s_cmp_lg_u32 s2, -1
; GFX9-ARCH-FLAT-NEXT: s_cselect_b32 s0, s1, 0
; GFX9-ARCH-FLAT-NEXT: s_cselect_b32 s1, s2, 0
; GFX9-ARCH-FLAT-NEXT: v_mov_b32_e32 v0, s1
; GFX9-ARCH-FLAT-NEXT: v_mov_b32_e32 v1, s0
; GFX9-ARCH-FLAT-NEXT: s_cmp_lg_u32 s0, -1
; GFX9-ARCH-FLAT-NEXT: s_cselect_b32 s1, s1, 0
; GFX9-ARCH-FLAT-NEXT: s_cselect_b32 s0, s0, 0
; GFX9-ARCH-FLAT-NEXT: v_mov_b32_e32 v0, s0
; GFX9-ARCH-FLAT-NEXT: v_mov_b32_e32 v1, s1
; GFX9-ARCH-FLAT-NEXT: flat_store_dword v[0:1], v2
; GFX9-ARCH-FLAT-NEXT: s_waitcnt vmcnt(0)
; GFX9-ARCH-FLAT-NEXT: s_endpgm
;
; GFX942-ARCH-FLAT-LABEL: with_private_to_flat_addrspacecast_cc_kernel:
; GFX942-ARCH-FLAT: ; %bb.0:
; GFX942-ARCH-FLAT-NEXT: s_load_dword s2, s[4:5], 0x0
; GFX942-ARCH-FLAT-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX942-ARCH-FLAT-NEXT: s_load_dword s0, s[4:5], 0x0
; GFX942-ARCH-FLAT-NEXT: v_mov_b32_e32 v2, 0
; GFX942-ARCH-FLAT-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-ARCH-FLAT-NEXT: s_cmp_lg_u32 s2, -1
; GFX942-ARCH-FLAT-NEXT: s_cselect_b32 s0, s1, 0
; GFX942-ARCH-FLAT-NEXT: s_cselect_b32 s1, s2, 0
; GFX942-ARCH-FLAT-NEXT: v_mov_b32_e32 v0, s1
; GFX942-ARCH-FLAT-NEXT: v_mov_b32_e32 v1, s0
; GFX942-ARCH-FLAT-NEXT: s_cmp_lg_u32 s0, -1
; GFX942-ARCH-FLAT-NEXT: s_cselect_b32 s1, s1, 0
; GFX942-ARCH-FLAT-NEXT: s_cselect_b32 s0, s0, 0
; GFX942-ARCH-FLAT-NEXT: v_mov_b32_e32 v0, s0
; GFX942-ARCH-FLAT-NEXT: v_mov_b32_e32 v1, s1
; GFX942-ARCH-FLAT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1
; GFX942-ARCH-FLAT-NEXT: s_waitcnt vmcnt(0)
; GFX942-ARCH-FLAT-NEXT: s_endpgm
;
; GFX10-LABEL: with_private_to_flat_addrspacecast_cc_kernel:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dword s2, s[8:9], 0x0
; GFX10-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX10-NEXT: s_load_dword s0, s[8:9], 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_cmp_lg_u32 s2, -1
; GFX10-NEXT: s_cselect_b32 s0, s2, 0
; GFX10-NEXT: s_cmp_lg_u32 s0, -1
; GFX10-NEXT: s_cselect_b32 s0, s0, 0
; GFX10-NEXT: s_cselect_b32 s1, s1, 0
; GFX10-NEXT: v_mov_b32_e32 v0, s0
; GFX10-NEXT: v_mov_b32_e32 v1, s1
@ -533,49 +533,49 @@ define amdgpu_kernel void @private_constant_expression_use(ptr addrspace(1) noca
;
; GFX9-LABEL: private_constant_expression_use:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
; GFX9-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9-NEXT: v_mov_b32_e32 v1, s3
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
;
; GFX9-ARCH-FLAT-LABEL: private_constant_expression_use:
; GFX9-ARCH-FLAT: ; %bb.0:
; GFX9-ARCH-FLAT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX9-ARCH-FLAT-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX9-ARCH-FLAT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
; GFX9-ARCH-FLAT-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9-ARCH-FLAT-NEXT: v_mov_b32_e32 v2, 0
; GFX9-ARCH-FLAT-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9-ARCH-FLAT-NEXT: v_mov_b32_e32 v1, s3
; GFX9-ARCH-FLAT-NEXT: v_mov_b32_e32 v1, s1
; GFX9-ARCH-FLAT-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-ARCH-FLAT-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX9-ARCH-FLAT-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
; GFX9-ARCH-FLAT-NEXT: s_waitcnt vmcnt(0)
; GFX9-ARCH-FLAT-NEXT: s_endpgm
;
; GFX942-ARCH-FLAT-LABEL: private_constant_expression_use:
; GFX942-ARCH-FLAT: ; %bb.0:
; GFX942-ARCH-FLAT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX942-ARCH-FLAT-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX942-ARCH-FLAT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
; GFX942-ARCH-FLAT-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX942-ARCH-FLAT-NEXT: v_mov_b32_e32 v2, 0
; GFX942-ARCH-FLAT-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX942-ARCH-FLAT-NEXT: v_mov_b32_e32 v1, s3
; GFX942-ARCH-FLAT-NEXT: v_mov_b32_e32 v1, s1
; GFX942-ARCH-FLAT-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-ARCH-FLAT-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
; GFX942-ARCH-FLAT-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
; GFX942-ARCH-FLAT-NEXT: s_waitcnt vmcnt(0)
; GFX942-ARCH-FLAT-NEXT: s_endpgm
;
; GFX10-LABEL: private_constant_expression_use:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX10-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
; GFX10-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX10-NEXT: v_mov_b32_e32 v1, s3
; GFX10-NEXT: v_mov_b32_e32 v1, s1
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_endpgm
store volatile ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr), ptr addrspace(1) %out, align 8
@ -611,48 +611,48 @@ define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(ptr addrspace(3) %ptr)
;
; GFX9-LABEL: calls_intrin_ascast_cc_kernel:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s2, s[8:9], 0x0
; GFX9-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX9-NEXT: s_load_dword s0, s[8:9], 0x0
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: v_mov_b32_e32 v2, 7
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s2
; GFX9-NEXT: v_mov_b32_e32 v0, s0
; GFX9-NEXT: flat_store_dword v[0:1], v2
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
;
; GFX9-ARCH-FLAT-LABEL: calls_intrin_ascast_cc_kernel:
; GFX9-ARCH-FLAT: ; %bb.0:
; GFX9-ARCH-FLAT-NEXT: s_load_dword s2, s[4:5], 0x0
; GFX9-ARCH-FLAT-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX9-ARCH-FLAT-NEXT: s_load_dword s0, s[4:5], 0x0
; GFX9-ARCH-FLAT-NEXT: v_mov_b32_e32 v1, s1
; GFX9-ARCH-FLAT-NEXT: v_mov_b32_e32 v2, 7
; GFX9-ARCH-FLAT-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-ARCH-FLAT-NEXT: v_mov_b32_e32 v0, s2
; GFX9-ARCH-FLAT-NEXT: v_mov_b32_e32 v0, s0
; GFX9-ARCH-FLAT-NEXT: flat_store_dword v[0:1], v2
; GFX9-ARCH-FLAT-NEXT: s_waitcnt vmcnt(0)
; GFX9-ARCH-FLAT-NEXT: s_endpgm
;
; GFX942-ARCH-FLAT-LABEL: calls_intrin_ascast_cc_kernel:
; GFX942-ARCH-FLAT: ; %bb.0:
; GFX942-ARCH-FLAT-NEXT: s_load_dword s2, s[4:5], 0x0
; GFX942-ARCH-FLAT-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX942-ARCH-FLAT-NEXT: s_load_dword s0, s[4:5], 0x0
; GFX942-ARCH-FLAT-NEXT: v_mov_b32_e32 v1, s1
; GFX942-ARCH-FLAT-NEXT: v_mov_b32_e32 v2, 7
; GFX942-ARCH-FLAT-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-ARCH-FLAT-NEXT: v_mov_b32_e32 v0, s2
; GFX942-ARCH-FLAT-NEXT: v_mov_b32_e32 v0, s0
; GFX942-ARCH-FLAT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1
; GFX942-ARCH-FLAT-NEXT: s_waitcnt vmcnt(0)
; GFX942-ARCH-FLAT-NEXT: s_endpgm
;
; GFX10-LABEL: calls_intrin_ascast_cc_kernel:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dword s2, s[8:9], 0x0
; GFX10-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX10-NEXT: v_mov_b32_e32 v2, 7
; GFX10-NEXT: s_load_dword s0, s[8:9], 0x0
; GFX10-NEXT: v_mov_b32_e32 v1, s1
; GFX10-NEXT: v_mov_b32_e32 v2, 7
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, s2
; GFX10-NEXT: v_mov_b32_e32 v0, s0
; GFX10-NEXT: flat_store_dword v[0:1], v2
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_endpgm

View File

@ -226,9 +226,8 @@ define void @private_alloca_to_flat(ptr %ptr) {
; GISEL-ASM-LABEL: private_alloca_to_flat:
; GISEL-ASM: ; %bb.0:
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-ASM-NEXT: s_mov_b64 s[4:5], src_private_base
; GISEL-ASM-NEXT: s_lshr_b32 s4, s32, 6
; GISEL-ASM-NEXT: s_mov_b64 s[6:7], src_private_base
; GISEL-ASM-NEXT: s_mov_b32 s5, s7
; GISEL-ASM-NEXT: v_mov_b32_e32 v0, s4
; GISEL-ASM-NEXT: v_mov_b32_e32 v2, 7
; GISEL-ASM-NEXT: v_mov_b32_e32 v1, s5
@ -330,21 +329,21 @@ define void @recursive_phis(i1 %cond, ptr addrspace(5) %ptr) {
; DAGISEL-ASM-NEXT: v_and_b32_e32 v0, 0xffff, v1
; DAGISEL-ASM-NEXT: ; %bb.2: ; %finallyendcf.split
; DAGISEL-ASM-NEXT: s_or_b64 exec, exec, s[4:5]
; DAGISEL-ASM-NEXT: s_xor_b64 s[6:7], vcc, -1
; DAGISEL-ASM-NEXT: s_mov_b64 s[4:5], 0
; DAGISEL-ASM-NEXT: s_mov_b64 s[8:9], src_private_base
; DAGISEL-ASM-NEXT: s_mov_b64 s[4:5], src_private_base
; DAGISEL-ASM-NEXT: s_xor_b64 s[8:9], vcc, -1
; DAGISEL-ASM-NEXT: s_mov_b64 s[6:7], 0
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v2, 7
; DAGISEL-ASM-NEXT: .LBB11_3: ; %finally
; DAGISEL-ASM-NEXT: ; =>This Inner Loop Header: Depth=1
; DAGISEL-ASM-NEXT: s_and_b64 s[10:11], exec, s[6:7]
; DAGISEL-ASM-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5]
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, s9
; DAGISEL-ASM-NEXT: s_and_b64 s[10:11], exec, s[8:9]
; DAGISEL-ASM-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7]
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, s5
; DAGISEL-ASM-NEXT: flat_store_dword v[0:1], v2
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0)
; DAGISEL-ASM-NEXT: s_andn2_b64 exec, exec, s[4:5]
; DAGISEL-ASM-NEXT: s_andn2_b64 exec, exec, s[6:7]
; DAGISEL-ASM-NEXT: s_cbranch_execnz .LBB11_3
; DAGISEL-ASM-NEXT: ; %bb.4: ; %end
; DAGISEL-ASM-NEXT: s_or_b64 exec, exec, s[4:5]
; DAGISEL-ASM-NEXT: s_or_b64 exec, exec, s[6:7]
; DAGISEL-ASM-NEXT: s_waitcnt lgkmcnt(0)
; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31]
;

View File

@ -254,8 +254,8 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grai
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX90A-NEXT: ; implicit-def: $vgpr0
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -447,8 +447,8 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grai
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX90A-NEXT: ; implicit-def: $vgpr0
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -846,8 +846,8 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gra
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -900,8 +900,8 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gra
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX908-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -1070,8 +1070,8 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_gra
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -1124,8 +1124,8 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_gra
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX908-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -1288,8 +1288,8 @@ define float @flat_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_gra
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX90A-NEXT: ; implicit-def: $vgpr0
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -1479,8 +1479,8 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gr
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -1535,8 +1535,8 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gr
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX908-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -2016,8 +2016,8 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__a
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -2070,8 +2070,8 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__a
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX908-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -2579,8 +2579,8 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX90A-NEXT: ; implicit-def: $vgpr0
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -2772,8 +2772,8 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX90A-NEXT: ; implicit-def: $vgpr0
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -3171,8 +3171,8 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fin
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -3225,8 +3225,8 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fin
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX908-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -3395,8 +3395,8 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fin
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -3449,8 +3449,8 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fin
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX908-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -3613,8 +3613,8 @@ define float @flat_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fin
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX90A-NEXT: ; implicit-def: $vgpr0
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -3804,8 +3804,8 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -3860,8 +3860,8 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX908-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -4024,8 +4024,8 @@ define float @flat_agent_atomic_fadd_ret_f32__ieee__amdgpu_no_fine_grained_memor
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX90A-NEXT: ; implicit-def: $vgpr0
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -4215,8 +4215,8 @@ define void @flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memo
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -4271,8 +4271,8 @@ define void @flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memo
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX908-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -6201,9 +6201,9 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX942-LABEL: flat_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b64 s[0:1], 0x7f8
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX942-NEXT: s_mov_b64 s[2:3], 0x7f8
; GFX942-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -6369,8 +6369,8 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0x7f8, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -6427,8 +6427,8 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v4, vcc, 0x7f8, v0
; GFX908-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX908-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX908-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -6662,10 +6662,10 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX942-LABEL: flat_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_movk_i32 s0, 0xf800
; GFX942-NEXT: s_mov_b32 s1, -1
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX942-NEXT: s_movk_i32 s2, 0xf800
; GFX942-NEXT: s_mov_b32 s3, -1
; GFX942-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -6831,8 +6831,8 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -6889,8 +6889,8 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
; GFX908-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX908-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX908-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -7552,9 +7552,9 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b64 s[0:1], 0x7f8
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX942-NEXT: s_mov_b64 s[2:3], 0x7f8
; GFX942-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -7715,8 +7715,8 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7f8, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -7771,8 +7771,8 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7f8, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX908-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -8001,10 +8001,10 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX942-LABEL: flat_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_movk_i32 s0, 0xf800
; GFX942-NEXT: s_mov_b32 s1, -1
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX942-NEXT: s_movk_i32 s2, 0xf800
; GFX942-NEXT: s_mov_b32 s3, -1
; GFX942-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -8165,8 +8165,8 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -8221,8 +8221,8 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX908-NEXT: s_xor_b64 s[4:5], exec, s[4:5]

View File

@ -3255,9 +3255,9 @@ define double @flat_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b64 s[0:1], 0x7f8
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX942-NEXT: s_mov_b64 s[2:3], 0x7f8
; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -3397,8 +3397,8 @@ define double @flat_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0x7f8, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -3438,8 +3438,8 @@ define double @flat_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX908-NEXT: v_add_co_u32_e32 v4, vcc, 0x7f8, v0
; GFX908-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX908-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX908-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -3667,10 +3667,10 @@ define double @flat_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_movk_i32 s0, 0xf800
; GFX942-NEXT: s_mov_b32 s1, -1
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX942-NEXT: s_movk_i32 s2, 0xf800
; GFX942-NEXT: s_mov_b32 s3, -1
; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -3810,8 +3810,8 @@ define double @flat_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -3851,8 +3851,8 @@ define double @flat_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX908-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
; GFX908-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX908-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX908-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -4465,9 +4465,9 @@ define void @flat_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX942-LABEL: flat_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b64 s[0:1], 0x7f8
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX942-NEXT: s_mov_b64 s[2:3], 0x7f8
; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -4605,8 +4605,8 @@ define void @flat_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7f8, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -4645,8 +4645,8 @@ define void @flat_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_max_f64 v[4:5], v[2:3], v[2:3]
; GFX908-NEXT: v_add_co_u32_e32 v6, vcc, 0x7f8, v0
; GFX908-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX908-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v7
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX908-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -4870,10 +4870,10 @@ define void @flat_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX942-LABEL: flat_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_movk_i32 s0, 0xf800
; GFX942-NEXT: s_mov_b32 s1, -1
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX942-NEXT: s_movk_i32 s2, 0xf800
; GFX942-NEXT: s_mov_b32 s3, -1
; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -5011,8 +5011,8 @@ define void @flat_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -5051,8 +5051,8 @@ define void @flat_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_max_f64 v[4:5], v[2:3], v[2:3]
; GFX908-NEXT: v_add_co_u32_e32 v6, vcc, 0xfffff800, v0
; GFX908-NEXT: v_addc_co_u32_e32 v7, vcc, -1, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX908-NEXT: v_addc_co_u32_e32 v7, vcc, -1, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v7
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX908-NEXT: s_xor_b64 s[4:5], exec, s[4:5]

View File

@ -3255,9 +3255,9 @@ define double @flat_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b64 s[0:1], 0x7f8
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX942-NEXT: s_mov_b64 s[2:3], 0x7f8
; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -3397,8 +3397,8 @@ define double @flat_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0x7f8, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -3438,8 +3438,8 @@ define double @flat_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX908-NEXT: v_add_co_u32_e32 v4, vcc, 0x7f8, v0
; GFX908-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX908-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX908-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -3667,10 +3667,10 @@ define double @flat_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX942-LABEL: flat_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_movk_i32 s0, 0xf800
; GFX942-NEXT: s_mov_b32 s1, -1
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX942-NEXT: s_movk_i32 s2, 0xf800
; GFX942-NEXT: s_mov_b32 s3, -1
; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -3810,8 +3810,8 @@ define double @flat_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -3851,8 +3851,8 @@ define double @flat_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX908-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
; GFX908-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX908-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX908-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -4465,9 +4465,9 @@ define void @flat_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX942-LABEL: flat_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b64 s[0:1], 0x7f8
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX942-NEXT: s_mov_b64 s[2:3], 0x7f8
; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -4605,8 +4605,8 @@ define void @flat_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7f8, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -4645,8 +4645,8 @@ define void @flat_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_gra
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_max_f64 v[4:5], v[2:3], v[2:3]
; GFX908-NEXT: v_add_co_u32_e32 v6, vcc, 0x7f8, v0
; GFX908-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX908-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v7
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX908-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -4870,10 +4870,10 @@ define void @flat_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX942-LABEL: flat_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_movk_i32 s0, 0xf800
; GFX942-NEXT: s_mov_b32 s1, -1
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX942-NEXT: s_movk_i32 s2, 0xf800
; GFX942-NEXT: s_mov_b32 s3, -1
; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -5011,8 +5011,8 @@ define void @flat_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -5051,8 +5051,8 @@ define void @flat_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_gra
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_max_f64 v[4:5], v[2:3], v[2:3]
; GFX908-NEXT: v_add_co_u32_e32 v6, vcc, 0xfffff800, v0
; GFX908-NEXT: v_addc_co_u32_e32 v7, vcc, -1, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX908-NEXT: v_addc_co_u32_e32 v7, vcc, -1, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v7
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX908-NEXT: s_xor_b64 s[4:5], exec, s[4:5]

View File

@ -3711,9 +3711,9 @@ define double @flat_agent_atomic_fsub_ret_f64__offset12b_pos(ptr %ptr, double %v
; GFX942-LABEL: flat_agent_atomic_fsub_ret_f64__offset12b_pos:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b64 s[0:1], 0x7f8
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX942-NEXT: s_mov_b64 s[2:3], 0x7f8
; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -3871,8 +3871,8 @@ define double @flat_agent_atomic_fsub_ret_f64__offset12b_pos(ptr %ptr, double %v
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0x7f8, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -3922,8 +3922,8 @@ define double @flat_agent_atomic_fsub_ret_f64__offset12b_pos(ptr %ptr, double %v
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v4, vcc, 0x7f8, v0
; GFX908-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX908-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX908-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -4157,10 +4157,10 @@ define double @flat_agent_atomic_fsub_ret_f64__offset12b_neg(ptr %ptr, double %v
; GFX942-LABEL: flat_agent_atomic_fsub_ret_f64__offset12b_neg:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_movk_i32 s0, 0xf800
; GFX942-NEXT: s_mov_b32 s1, -1
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX942-NEXT: s_movk_i32 s2, 0xf800
; GFX942-NEXT: s_mov_b32 s3, -1
; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -4318,8 +4318,8 @@ define double @flat_agent_atomic_fsub_ret_f64__offset12b_neg(ptr %ptr, double %v
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -4369,8 +4369,8 @@ define double @flat_agent_atomic_fsub_ret_f64__offset12b_neg(ptr %ptr, double %v
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0
; GFX908-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX908-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX908-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -5019,9 +5019,9 @@ define void @flat_agent_atomic_fsub_noret_f64__offset12b_pos(ptr %ptr, double %v
; GFX942-LABEL: flat_agent_atomic_fsub_noret_f64__offset12b_pos:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b64 s[0:1], 0x7f8
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX942-NEXT: s_mov_b64 s[2:3], 0x7f8
; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -5175,8 +5175,8 @@ define void @flat_agent_atomic_fsub_noret_f64__offset12b_pos(ptr %ptr, double %v
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7f8, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -5225,8 +5225,8 @@ define void @flat_agent_atomic_fsub_noret_f64__offset12b_pos(ptr %ptr, double %v
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7f8, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX908-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -5455,10 +5455,10 @@ define void @flat_agent_atomic_fsub_noret_f64__offset12b_neg(ptr %ptr, double %v
; GFX942-LABEL: flat_agent_atomic_fsub_noret_f64__offset12b_neg:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_movk_i32 s0, 0xf800
; GFX942-NEXT: s_mov_b32 s1, -1
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX942-NEXT: s_movk_i32 s2, 0xf800
; GFX942-NEXT: s_mov_b32 s3, -1
; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -5612,8 +5612,8 @@ define void @flat_agent_atomic_fsub_noret_f64__offset12b_neg(ptr %ptr, double %v
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -5662,8 +5662,8 @@ define void @flat_agent_atomic_fsub_noret_f64__offset12b_neg(ptr %ptr, double %v
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX908-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX908-NEXT: s_xor_b64 s[4:5], exec, s[4:5]

View File

@ -637,8 +637,8 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -824,11 +824,11 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn_neg128(ptr inreg %sbase, i
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -995,8 +995,8 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -1028,9 +1028,8 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX950-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1
; GFX950-GISEL-NEXT: s_nop 0
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -1150,11 +1149,11 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -1497,8 +1496,8 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -1688,11 +1687,11 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -1869,8 +1868,8 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -1905,9 +1904,8 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX950-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1
; GFX950-GISEL-NEXT: s_nop 0
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -2038,11 +2036,11 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -2393,8 +2391,8 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -2586,11 +2584,11 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -2769,8 +2767,8 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -2807,9 +2805,8 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX950-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1
; GFX950-GISEL-NEXT: s_nop 0
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -2940,11 +2937,11 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -3299,8 +3296,8 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -3492,11 +3489,11 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -3675,8 +3672,8 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -3712,9 +3709,8 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX950-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1
; GFX950-GISEL-NEXT: s_nop 0
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -3846,11 +3842,11 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -4203,8 +4199,8 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn(ptr inreg %sbase, i32 %voffs
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -4396,11 +4392,11 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn_neg128(ptr inreg %sbase, i32
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -4579,8 +4575,8 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, i
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -4616,9 +4612,8 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, i
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX950-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1
; GFX950-GISEL-NEXT: s_nop 0
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -4750,11 +4745,11 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vof
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -5107,8 +5102,8 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -5300,11 +5295,11 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -5483,8 +5478,8 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -5520,9 +5515,8 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX950-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1
; GFX950-GISEL-NEXT: s_nop 0
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -5654,11 +5648,11 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -5971,8 +5965,8 @@ define amdgpu_ps <2 x float> @flat_max_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -6156,11 +6150,11 @@ define amdgpu_ps <2 x float> @flat_max_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -6333,8 +6327,8 @@ define amdgpu_ps void @flat_max_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -6370,9 +6364,8 @@ define amdgpu_ps void @flat_max_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX950-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1
; GFX950-GISEL-NEXT: s_nop 0
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -6496,11 +6489,11 @@ define amdgpu_ps void @flat_max_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -6813,8 +6806,8 @@ define amdgpu_ps <2 x float> @flat_min_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -6998,11 +6991,11 @@ define amdgpu_ps <2 x float> @flat_min_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -7175,8 +7168,8 @@ define amdgpu_ps void @flat_min_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -7212,9 +7205,8 @@ define amdgpu_ps void @flat_min_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX950-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1
; GFX950-GISEL-NEXT: s_nop 0
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -7338,11 +7330,11 @@ define amdgpu_ps void @flat_min_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -7655,8 +7647,8 @@ define amdgpu_ps <2 x float> @flat_umax_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -7840,11 +7832,11 @@ define amdgpu_ps <2 x float> @flat_umax_saddr_i64_rtn_neg128(ptr inreg %sbase, i
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -8017,8 +8009,8 @@ define amdgpu_ps void @flat_umax_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -8054,9 +8046,8 @@ define amdgpu_ps void @flat_umax_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX950-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1
; GFX950-GISEL-NEXT: s_nop 0
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -8180,11 +8171,11 @@ define amdgpu_ps void @flat_umax_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -8497,8 +8488,8 @@ define amdgpu_ps <2 x float> @flat_umin_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -8682,11 +8673,11 @@ define amdgpu_ps <2 x float> @flat_umin_saddr_i64_rtn_neg128(ptr inreg %sbase, i
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -8859,8 +8850,8 @@ define amdgpu_ps void @flat_umin_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -8896,9 +8887,8 @@ define amdgpu_ps void @flat_umin_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX950-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1
; GFX950-GISEL-NEXT: s_nop 0
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -9022,11 +9012,11 @@ define amdgpu_ps void @flat_umin_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -9393,8 +9383,8 @@ define amdgpu_ps <2 x float> @flat_cmpxchg_saddr_i64_rtn(ptr inreg %sbase, i32 %
; GFX950-SDAG-NEXT: v_mov_b32_e32 v5, v4
; GFX950-SDAG-NEXT: v_mov_b32_e32 v4, v3
; GFX950-SDAG-NEXT: v_mov_b32_e32 v7, v2
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v3
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -9594,14 +9584,14 @@ define amdgpu_ps <2 x float> @flat_cmpxchg_saddr_i64_rtn_neg128(ptr inreg %sbase
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: v_mov_b32_e32 v6, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v5, v4
; GFX950-SDAG-NEXT: v_mov_b32_e32 v4, v3
; GFX950-SDAG-NEXT: v_mov_b32_e32 v7, v2
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v3
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -9788,8 +9778,8 @@ define amdgpu_ps void @flat_cmpxchg_saddr_i64_nortn(ptr inreg %sbase, i32 %voffs
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: v_mov_b32_e32 v6, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_mov_b32_e32 v5, v4
; GFX950-SDAG-NEXT: v_mov_b32_e32 v4, v3
; GFX950-SDAG-NEXT: v_mov_b32_e32 v7, v2
@ -9831,9 +9821,8 @@ define amdgpu_ps void @flat_cmpxchg_saddr_i64_nortn(ptr inreg %sbase, i32 %voffs
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX950-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX950-GISEL-NEXT: v_mov_b32_e32 v8, v1
; GFX950-GISEL-NEXT: s_nop 0
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: v_mov_b32_e32 v7, v4
; GFX950-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -9969,11 +9958,11 @@ define amdgpu_ps void @flat_cmpxchg_saddr_i64_nortn_neg128(ptr inreg %sbase, i32
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: v_mov_b32_e32 v6, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_mov_b32_e32 v5, v4
; GFX950-SDAG-NEXT: v_mov_b32_e32 v4, v3
; GFX950-SDAG-NEXT: v_mov_b32_e32 v7, v2
@ -10299,8 +10288,8 @@ define amdgpu_ps <2 x float> @flat_inc_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -10498,11 +10487,11 @@ define amdgpu_ps <2 x float> @flat_inc_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -10683,8 +10672,8 @@ define amdgpu_ps void @flat_inc_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -10720,9 +10709,8 @@ define amdgpu_ps void @flat_inc_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX950-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1
; GFX950-GISEL-NEXT: s_nop 0
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -10852,11 +10840,11 @@ define amdgpu_ps void @flat_inc_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -11180,8 +11168,8 @@ define amdgpu_ps <2 x float> @flat_dec_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -11385,11 +11373,11 @@ define amdgpu_ps <2 x float> @flat_dec_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
; GFX950-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
@ -11576,8 +11564,8 @@ define amdgpu_ps void @flat_dec_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -11614,9 +11602,8 @@ define amdgpu_ps void @flat_dec_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX950-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1
; GFX950-GISEL-NEXT: s_nop 0
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX950-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -11751,11 +11738,11 @@ define amdgpu_ps void @flat_dec_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX950-SDAG-NEXT: s_movk_i32 s0, 0xff80
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1]
; GFX950-SDAG-NEXT: s_mov_b32 s1, -1
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX950-SDAG-NEXT: s_movk_i32 s2, 0xff80
; GFX950-SDAG-NEXT: s_mov_b32 s3, -1
; GFX950-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
; GFX950-SDAG-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-SDAG-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@ -11841,8 +11828,8 @@ define double @flat_atomic_fadd_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x50
; GFX1250-SDAG-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX1250-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x50
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s1, s3
; GFX1250-SDAG-NEXT: s_cselect_b32 s2, -1, 0
@ -11899,8 +11886,8 @@ define double @flat_atomic_fadd_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: s_add_co_u32 s0, s0, 0x50
; GFX1250-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
; GFX1250-GISEL-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX1250-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
; GFX1250-GISEL-NEXT: s_mov_b32 s2, 1
; GFX1250-GISEL-NEXT: s_cmp_lg_u32 s1, s3
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr2_vgpr3
@ -11958,8 +11945,8 @@ define double @flat_atomic_fadd_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-SDAG-NEXT: s_add_u32 s0, s0, 0x50
; GFX950-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX950-SDAG-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX950-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX950-SDAG-NEXT: s_cmp_eq_u32 s1, s3
; GFX950-SDAG-NEXT: s_cselect_b64 s[2:3], -1, 0
; GFX950-SDAG-NEXT: s_andn2_b64 vcc, exec, s[2:3]
@ -12006,8 +11993,8 @@ define double @flat_atomic_fadd_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-GISEL-NEXT: s_add_u32 s0, s0, 0x50
; GFX950-GISEL-NEXT: s_addc_u32 s1, s1, 0
; GFX950-GISEL-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX950-GISEL-NEXT: s_addc_u32 s1, s1, 0
; GFX950-GISEL-NEXT: s_cmp_lg_u32 s1, s3
; GFX950-GISEL-NEXT: s_mov_b32 s2, 1
; GFX950-GISEL-NEXT: ; implicit-def: $vgpr2_vgpr3
@ -12061,8 +12048,8 @@ define void @flat_atomic_fadd_f64_saddr_nortn(ptr inreg %ptr, double %data) {
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x50
; GFX1250-SDAG-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX1250-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x50
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s1, s3
; GFX1250-SDAG-NEXT: s_cselect_b32 s2, -1, 0
@ -12121,8 +12108,8 @@ define void @flat_atomic_fadd_f64_saddr_nortn(ptr inreg %ptr, double %data) {
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: s_add_co_u32 s0, s0, 0x50
; GFX1250-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
; GFX1250-GISEL-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX1250-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
; GFX1250-GISEL-NEXT: s_mov_b32 s2, 1
; GFX1250-GISEL-NEXT: s_cmp_lg_u32 s1, s3
; GFX1250-GISEL-NEXT: s_cbranch_scc0 .LBB111_6
@ -12177,8 +12164,8 @@ define void @flat_atomic_fadd_f64_saddr_nortn(ptr inreg %ptr, double %data) {
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-SDAG-NEXT: s_add_u32 s0, s0, 0x50
; GFX950-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX950-SDAG-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX950-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX950-SDAG-NEXT: s_cmp_eq_u32 s1, s3
; GFX950-SDAG-NEXT: s_cselect_b64 s[2:3], -1, 0
; GFX950-SDAG-NEXT: s_andn2_b64 vcc, exec, s[2:3]
@ -12225,8 +12212,8 @@ define void @flat_atomic_fadd_f64_saddr_nortn(ptr inreg %ptr, double %data) {
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-GISEL-NEXT: s_add_u32 s0, s0, 0x50
; GFX950-GISEL-NEXT: s_addc_u32 s1, s1, 0
; GFX950-GISEL-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX950-GISEL-NEXT: s_addc_u32 s1, s1, 0
; GFX950-GISEL-NEXT: s_cmp_lg_u32 s1, s3
; GFX950-GISEL-NEXT: s_mov_b32 s2, 1
; GFX950-GISEL-NEXT: s_cbranch_scc0 .LBB111_6
@ -12355,8 +12342,8 @@ define double @flat_atomic_fmax_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-SDAG-NEXT: s_add_u32 s0, s0, 0x50
; GFX950-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX950-SDAG-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX950-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX950-SDAG-NEXT: s_cmp_eq_u32 s1, s3
; GFX950-SDAG-NEXT: s_cselect_b64 s[2:3], -1, 0
; GFX950-SDAG-NEXT: s_andn2_b64 vcc, exec, s[2:3]
@ -12388,8 +12375,8 @@ define double @flat_atomic_fmax_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-GISEL-NEXT: s_add_u32 s2, s0, 0x50
; GFX950-GISEL-NEXT: s_addc_u32 s3, s1, 0
; GFX950-GISEL-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX950-GISEL-NEXT: s_addc_u32 s3, s1, 0
; GFX950-GISEL-NEXT: s_cmp_lg_u32 s3, s5
; GFX950-GISEL-NEXT: s_mov_b32 s4, 1
; GFX950-GISEL-NEXT: ; implicit-def: $vgpr2_vgpr3
@ -12506,8 +12493,8 @@ define void @flat_atomic_fmax_f64_saddr_nortn(ptr inreg %ptr, double %data) {
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-SDAG-NEXT: s_add_u32 s0, s0, 0x50
; GFX950-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX950-SDAG-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX950-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX950-SDAG-NEXT: s_cmp_eq_u32 s1, s3
; GFX950-SDAG-NEXT: s_cselect_b64 s[2:3], -1, 0
; GFX950-SDAG-NEXT: s_andn2_b64 vcc, exec, s[2:3]
@ -12540,8 +12527,8 @@ define void @flat_atomic_fmax_f64_saddr_nortn(ptr inreg %ptr, double %data) {
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-GISEL-NEXT: s_add_u32 s2, s0, 0x50
; GFX950-GISEL-NEXT: s_addc_u32 s3, s1, 0
; GFX950-GISEL-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX950-GISEL-NEXT: s_addc_u32 s3, s1, 0
; GFX950-GISEL-NEXT: s_cmp_lg_u32 s3, s5
; GFX950-GISEL-NEXT: s_mov_b32 s4, 1
; GFX950-GISEL-NEXT: s_cbranch_scc0 .LBB113_2
@ -12656,8 +12643,8 @@ define double @flat_atomic_fmin_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-SDAG-NEXT: s_add_u32 s0, s0, 0x50
; GFX950-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX950-SDAG-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX950-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX950-SDAG-NEXT: s_cmp_eq_u32 s1, s3
; GFX950-SDAG-NEXT: s_cselect_b64 s[2:3], -1, 0
; GFX950-SDAG-NEXT: s_andn2_b64 vcc, exec, s[2:3]
@ -12689,8 +12676,8 @@ define double @flat_atomic_fmin_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-GISEL-NEXT: s_add_u32 s2, s0, 0x50
; GFX950-GISEL-NEXT: s_addc_u32 s3, s1, 0
; GFX950-GISEL-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX950-GISEL-NEXT: s_addc_u32 s3, s1, 0
; GFX950-GISEL-NEXT: s_cmp_lg_u32 s3, s5
; GFX950-GISEL-NEXT: s_mov_b32 s4, 1
; GFX950-GISEL-NEXT: ; implicit-def: $vgpr2_vgpr3
@ -12807,8 +12794,8 @@ define void @flat_atomic_fmin_f64_saddr_nortn(ptr inreg %ptr, double %data) {
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-SDAG-NEXT: s_add_u32 s0, s0, 0x50
; GFX950-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX950-SDAG-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX950-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX950-SDAG-NEXT: s_cmp_eq_u32 s1, s3
; GFX950-SDAG-NEXT: s_cselect_b64 s[2:3], -1, 0
; GFX950-SDAG-NEXT: s_andn2_b64 vcc, exec, s[2:3]
@ -12841,8 +12828,8 @@ define void @flat_atomic_fmin_f64_saddr_nortn(ptr inreg %ptr, double %data) {
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-GISEL-NEXT: s_add_u32 s2, s0, 0x50
; GFX950-GISEL-NEXT: s_addc_u32 s3, s1, 0
; GFX950-GISEL-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX950-GISEL-NEXT: s_addc_u32 s3, s1, 0
; GFX950-GISEL-NEXT: s_cmp_lg_u32 s3, s5
; GFX950-GISEL-NEXT: s_mov_b32 s4, 1
; GFX950-GISEL-NEXT: s_cbranch_scc0 .LBB115_2

File diff suppressed because it is too large Load Diff

View File

@ -187,8 +187,8 @@ define void @flat_atomic_xchg_i64_noret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -422,8 +422,8 @@ define i64 @flat_atomic_xchg_i64_ret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -660,8 +660,8 @@ define amdgpu_gfx void @flat_atomic_xchg_i64_noret_offset_scalar(ptr inreg %out,
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -897,8 +897,8 @@ define amdgpu_gfx i64 @flat_atomic_xchg_i64_ret_offset_scalar(ptr inreg %out, i6
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -1010,8 +1010,8 @@ define void @flat_atomic_xchg_i64_noret_offset__amdgpu_no_remote_memory(ptr %out
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -1126,8 +1126,8 @@ define i64 @flat_atomic_xchg_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -1347,8 +1347,8 @@ define void @flat_atomic_xchg_f64_noret_offset(ptr %out, double %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -1582,8 +1582,8 @@ define double @flat_atomic_xchg_f64_ret_offset(ptr %out, double %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -1820,8 +1820,8 @@ define amdgpu_gfx void @flat_atomic_xchg_f64_noret_offset_scalar(ptr inreg %out,
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -2057,8 +2057,8 @@ define amdgpu_gfx double @flat_atomic_xchg_f64_ret_offset_scalar(ptr inreg %out,
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -2170,8 +2170,8 @@ define void @flat_atomic_xchg_f64_noret_offset__amdgpu_no_remote_memory(ptr %out
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -2286,8 +2286,8 @@ define double @flat_atomic_xchg_f64_ret_offset__amdgpu_no_remote_memory(ptr %out
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -2537,8 +2537,8 @@ define void @flat_atomic_add_i64_noret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -2797,8 +2797,8 @@ define i64 @flat_atomic_add_i64_ret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -3063,8 +3063,8 @@ define amdgpu_gfx void @flat_atomic_add_i64_noret_offset_scalar(ptr inreg %out,
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -3320,8 +3320,8 @@ define amdgpu_gfx i64 @flat_atomic_add_i64_ret_offset_scalar(ptr inreg %out, i64
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -3448,8 +3448,8 @@ define void @flat_atomic_add_i64_noret_offset__amdgpu_no_remote_memory(ptr %out,
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -3578,8 +3578,8 @@ define i64 @flat_atomic_add_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i6
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -3924,8 +3924,8 @@ define void @flat_atomic_sub_i64_noret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -4284,8 +4284,8 @@ define i64 @flat_atomic_sub_i64_ret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -4656,8 +4656,8 @@ define amdgpu_gfx void @flat_atomic_sub_i64_noret_offset_scalar(ptr inreg %out,
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -5017,8 +5017,8 @@ define amdgpu_gfx i64 @flat_atomic_sub_i64_ret_offset_scalar(ptr inreg %out, i64
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -5159,8 +5159,8 @@ define void @flat_atomic_sub_i64_noret_offset__amdgpu_no_remote_memory(ptr %out,
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -5289,8 +5289,8 @@ define i64 @flat_atomic_sub_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i6
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -5635,8 +5635,8 @@ define void @flat_atomic_and_i64_noret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -5995,8 +5995,8 @@ define i64 @flat_atomic_and_i64_ret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -6357,8 +6357,8 @@ define amdgpu_gfx void @flat_atomic_and_i64_noret_offset_scalar(ptr inreg %out,
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -6706,8 +6706,8 @@ define amdgpu_gfx i64 @flat_atomic_and_i64_ret_offset_scalar(ptr inreg %out, i64
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -6846,8 +6846,8 @@ define void @flat_atomic_and_i64_noret_offset__amdgpu_no_remote_memory(ptr %out,
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -6976,8 +6976,8 @@ define i64 @flat_atomic_and_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i6
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -7342,8 +7342,8 @@ define void @flat_atomic_nand_i64_noret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -7726,8 +7726,8 @@ define i64 @flat_atomic_nand_i64_ret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -8112,8 +8112,8 @@ define amdgpu_gfx void @flat_atomic_nand_i64_noret_offset_scalar(ptr inreg %out,
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -8485,8 +8485,8 @@ define amdgpu_gfx i64 @flat_atomic_nand_i64_ret_offset_scalar(ptr inreg %out, i6
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -8675,8 +8675,8 @@ define void @flat_atomic_nand_i64_noret_offset__amdgpu_no_remote_memory(ptr %out
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -8871,8 +8871,8 @@ define i64 @flat_atomic_nand_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -9237,8 +9237,8 @@ define void @flat_atomic_or_i64_noret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -9597,8 +9597,8 @@ define i64 @flat_atomic_or_i64_ret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -9959,8 +9959,8 @@ define amdgpu_gfx void @flat_atomic_or_i64_noret_offset_scalar(ptr inreg %out, i
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -10308,8 +10308,8 @@ define amdgpu_gfx i64 @flat_atomic_or_i64_ret_offset_scalar(ptr inreg %out, i64
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -10448,8 +10448,8 @@ define void @flat_atomic_or_i64_noret_offset__amdgpu_no_remote_memory(ptr %out,
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -10578,8 +10578,8 @@ define i64 @flat_atomic_or_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -10924,8 +10924,8 @@ define void @flat_atomic_xor_i64_noret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -11284,8 +11284,8 @@ define i64 @flat_atomic_xor_i64_ret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -11646,8 +11646,8 @@ define amdgpu_gfx void @flat_atomic_xor_i64_noret_offset_scalar(ptr inreg %out,
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -11995,8 +11995,8 @@ define amdgpu_gfx i64 @flat_atomic_xor_i64_ret_offset_scalar(ptr inreg %out, i64
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -12135,8 +12135,8 @@ define void @flat_atomic_xor_i64_noret_offset__amdgpu_no_remote_memory(ptr %out,
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -12265,8 +12265,8 @@ define i64 @flat_atomic_xor_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i6
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -12611,8 +12611,8 @@ define void @flat_atomic_max_i64_noret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -12971,8 +12971,8 @@ define i64 @flat_atomic_max_i64_ret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -13358,8 +13358,8 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_offset_scalar(ptr inreg %out,
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -13737,8 +13737,8 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_offset_scalar(ptr inreg %out, i64
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -13939,20 +13939,20 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 %
; GCN3: ; %bb.0: ; %entry
; GCN3-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GCN3-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GCN3-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GCN3-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34
; GCN3-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GCN3-NEXT: s_mov_b32 s14, -1
; GCN3-NEXT: s_mov_b32 s15, 0xe00000
; GCN3-NEXT: s_add_u32 s12, s12, s11
; GCN3-NEXT: s_addc_u32 s13, s13, 0
; GCN3-NEXT: s_waitcnt lgkmcnt(0)
; GCN3-NEXT: s_lshl_b64 s[6:7], s[6:7], 3
; GCN3-NEXT: s_add_u32 s0, s0, s6
; GCN3-NEXT: s_addc_u32 s1, s1, s7
; GCN3-NEXT: s_lshl_b64 s[4:5], s[8:9], 3
; GCN3-NEXT: s_add_u32 s0, s0, s4
; GCN3-NEXT: s_addc_u32 s1, s1, s5
; GCN3-NEXT: s_add_u32 s0, s0, 32
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: s_mov_b64 s[6:7], src_private_base
; GCN3-NEXT: s_addc_u32 s1, s1, 0
; GCN3-NEXT: s_cmp_eq_u32 s1, s5
; GCN3-NEXT: s_cmp_eq_u32 s1, s7
; GCN3-NEXT: s_cselect_b64 s[4:5], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[4:5]
; GCN3-NEXT: s_mov_b64 s[4:5], -1
@ -14368,18 +14368,18 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index)
; GCN3: ; %bb.0: ; %entry
; GCN3-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GCN3-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GCN3-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GCN3-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34
; GCN3-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GCN3-NEXT: s_mov_b32 s14, -1
; GCN3-NEXT: s_mov_b32 s15, 0xe00000
; GCN3-NEXT: s_add_u32 s12, s12, s11
; GCN3-NEXT: s_addc_u32 s13, s13, 0
; GCN3-NEXT: s_waitcnt lgkmcnt(0)
; GCN3-NEXT: s_lshl_b64 s[6:7], s[6:7], 3
; GCN3-NEXT: s_add_u32 s0, s0, s6
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: s_addc_u32 s1, s1, s7
; GCN3-NEXT: s_cmp_eq_u32 s1, s5
; GCN3-NEXT: s_lshl_b64 s[4:5], s[8:9], 3
; GCN3-NEXT: s_add_u32 s0, s0, s4
; GCN3-NEXT: s_mov_b64 s[6:7], src_private_base
; GCN3-NEXT: s_addc_u32 s1, s1, s5
; GCN3-NEXT: s_cmp_eq_u32 s1, s7
; GCN3-NEXT: s_cselect_b64 s[4:5], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[4:5]
; GCN3-NEXT: s_mov_b64 s[4:5], -1
@ -14734,8 +14734,8 @@ define void @flat_atomic_max_i64_noret_offset__amdgpu_no_remote_memory(ptr %out,
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -14864,8 +14864,8 @@ define i64 @flat_atomic_max_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i6
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -15210,8 +15210,8 @@ define void @flat_atomic_umax_i64_noret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -15570,8 +15570,8 @@ define i64 @flat_atomic_umax_i64_ret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -15957,8 +15957,8 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_offset_scalar(ptr inreg %out,
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -16336,8 +16336,8 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_offset_scalar(ptr inreg %out, i6
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -16538,20 +16538,20 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64
; GCN3: ; %bb.0: ; %entry
; GCN3-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GCN3-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GCN3-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GCN3-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34
; GCN3-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GCN3-NEXT: s_mov_b32 s14, -1
; GCN3-NEXT: s_mov_b32 s15, 0xe00000
; GCN3-NEXT: s_add_u32 s12, s12, s11
; GCN3-NEXT: s_addc_u32 s13, s13, 0
; GCN3-NEXT: s_waitcnt lgkmcnt(0)
; GCN3-NEXT: s_lshl_b64 s[6:7], s[6:7], 3
; GCN3-NEXT: s_add_u32 s0, s0, s6
; GCN3-NEXT: s_addc_u32 s1, s1, s7
; GCN3-NEXT: s_lshl_b64 s[4:5], s[8:9], 3
; GCN3-NEXT: s_add_u32 s0, s0, s4
; GCN3-NEXT: s_addc_u32 s1, s1, s5
; GCN3-NEXT: s_add_u32 s0, s0, 32
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: s_mov_b64 s[6:7], src_private_base
; GCN3-NEXT: s_addc_u32 s1, s1, 0
; GCN3-NEXT: s_cmp_eq_u32 s1, s5
; GCN3-NEXT: s_cmp_eq_u32 s1, s7
; GCN3-NEXT: s_cselect_b64 s[4:5], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[4:5]
; GCN3-NEXT: s_mov_b64 s[4:5], -1
@ -17124,8 +17124,8 @@ define void @flat_atomic_umax_i64_noret_offset__amdgpu_no_remote_memory(ptr %out
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -17254,8 +17254,8 @@ define i64 @flat_atomic_umax_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -17600,8 +17600,8 @@ define void @flat_atomic_umin_i64_noret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -17960,8 +17960,8 @@ define i64 @flat_atomic_umin_i64_ret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -18347,8 +18347,8 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_offset_scalar(ptr inreg %out,
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -18726,8 +18726,8 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_offset_scalar(ptr inreg %out, i6
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -18871,8 +18871,8 @@ define void @flat_atomic_umin_i64_noret_offset__amdgpu_no_remote_memory(ptr %out
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -19001,8 +19001,8 @@ define i64 @flat_atomic_umin_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -19347,8 +19347,8 @@ define void @flat_atomic_min_i64_noret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -19707,8 +19707,8 @@ define i64 @flat_atomic_min_i64_ret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -20094,8 +20094,8 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_offset_scalar(ptr inreg %out,
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -20473,8 +20473,8 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_offset_scalar(ptr inreg %out, i64
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -20675,20 +20675,20 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 %
; GCN3: ; %bb.0: ; %entry
; GCN3-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GCN3-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GCN3-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GCN3-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34
; GCN3-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GCN3-NEXT: s_mov_b32 s14, -1
; GCN3-NEXT: s_mov_b32 s15, 0xe00000
; GCN3-NEXT: s_add_u32 s12, s12, s11
; GCN3-NEXT: s_addc_u32 s13, s13, 0
; GCN3-NEXT: s_waitcnt lgkmcnt(0)
; GCN3-NEXT: s_lshl_b64 s[6:7], s[6:7], 3
; GCN3-NEXT: s_add_u32 s0, s0, s6
; GCN3-NEXT: s_addc_u32 s1, s1, s7
; GCN3-NEXT: s_lshl_b64 s[4:5], s[8:9], 3
; GCN3-NEXT: s_add_u32 s0, s0, s4
; GCN3-NEXT: s_addc_u32 s1, s1, s5
; GCN3-NEXT: s_add_u32 s0, s0, 32
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: s_mov_b64 s[6:7], src_private_base
; GCN3-NEXT: s_addc_u32 s1, s1, 0
; GCN3-NEXT: s_cmp_eq_u32 s1, s5
; GCN3-NEXT: s_cmp_eq_u32 s1, s7
; GCN3-NEXT: s_cselect_b64 s[4:5], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[4:5]
; GCN3-NEXT: s_mov_b64 s[4:5], -1
@ -21101,9 +21101,9 @@ define amdgpu_kernel void @atomic_min_i64(ptr %out, i64 %in) {
; GCN3-NEXT: s_mov_b32 s15, 0xe00000
; GCN3-NEXT: s_add_u32 s12, s12, s11
; GCN3-NEXT: s_addc_u32 s13, s13, 0
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: s_mov_b64 s[6:7], src_private_base
; GCN3-NEXT: s_waitcnt lgkmcnt(0)
; GCN3-NEXT: s_cmp_eq_u32 s1, s5
; GCN3-NEXT: s_cmp_eq_u32 s1, s7
; GCN3-NEXT: s_cselect_b64 s[4:5], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[4:5]
; GCN3-NEXT: s_mov_b64 s[4:5], -1
@ -21457,8 +21457,8 @@ define void @flat_atomic_min_i64_noret_offset__amdgpu_no_remote_memory(ptr %out,
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -21587,8 +21587,8 @@ define i64 @flat_atomic_min_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i6
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -21958,8 +21958,8 @@ define void @flat_atomic_uinc_wrap_i64_noret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -22348,8 +22348,8 @@ define i64 @flat_atomic_uinc_wrap_i64_ret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -22745,8 +22745,8 @@ define amdgpu_gfx void @flat_atomic_uinc_wrap_i64_noret_offset_scalar(ptr inreg
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -23130,8 +23130,8 @@ define amdgpu_gfx i64 @flat_atomic_uinc_wrap_i64_ret_offset_scalar(ptr inreg %ou
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s34, s4, 32
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_mov_b64 s[36:37], src_private_base
; GCN3-NEXT: s_addc_u32 s35, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s35, s37
; GCN3-NEXT: s_cselect_b64 s[36:37], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[36:37]
@ -23282,8 +23282,8 @@ define void @flat_atomic_uinc_wrap_i64_noret_offset__amdgpu_no_remote_memory(ptr
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -23421,8 +23421,8 @@ define i64 @flat_atomic_uinc_wrap_i64_ret_offset__amdgpu_no_remote_memory(ptr %o
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -23810,8 +23810,8 @@ define void @flat_atomic_udec_wrap_i64_noret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
@ -24223,8 +24223,8 @@ define i64 @flat_atomic_udec_wrap_i64_ret_offset(ptr %out, i64 %in) {
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
@ -24659,8 +24659,8 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i64_noret_offset_scalar(ptr inreg
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s38, s4, 32
; GCN3-NEXT: s_addc_u32 s39, s5, 0
; GCN3-NEXT: s_mov_b64 s[34:35], src_private_base
; GCN3-NEXT: s_addc_u32 s39, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s39, s35
; GCN3-NEXT: s_cselect_b64 s[34:35], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[34:35]
@ -25091,8 +25091,8 @@ define amdgpu_gfx i64 @flat_atomic_udec_wrap_i64_ret_offset_scalar(ptr inreg %ou
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: s_add_u32 s38, s4, 32
; GCN3-NEXT: s_addc_u32 s39, s5, 0
; GCN3-NEXT: s_mov_b64 s[34:35], src_private_base
; GCN3-NEXT: s_addc_u32 s39, s5, 0
; GCN3-NEXT: s_cmp_eq_u32 s39, s35
; GCN3-NEXT: s_cselect_b64 s[34:35], -1, 0
; GCN3-NEXT: s_andn2_b64 vcc, exec, s[34:35]
@ -25253,8 +25253,8 @@ define void @flat_atomic_udec_wrap_i64_noret_offset__amdgpu_no_remote_memory(ptr
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN3-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@ -25397,8 +25397,8 @@ define i64 @flat_atomic_udec_wrap_i64_ret_offset__amdgpu_no_remote_memory(ptr %o
; GCN3: ; %bb.0:
; GCN3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN3-NEXT: v_add_co_u32_e32 v4, vcc, 32, v0
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN3-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GCN3-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GCN3-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN3-NEXT: s_and_saveexec_b64 s[4:5], vcc

View File

@ -6,21 +6,21 @@ define protected amdgpu_kernel void @IllegalGEPConst(i32 %a, ptr addrspace(1) %b
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; CHECK-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; CHECK-NEXT: s_load_dword s6, s[4:5], 0x24
; CHECK-NEXT: s_load_dword s8, s[4:5], 0x24
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
; CHECK-NEXT: s_mov_b32 s14, -1
; CHECK-NEXT: s_mov_b32 s15, 0xe00000
; CHECK-NEXT: s_add_u32 s12, s12, s11
; CHECK-NEXT: s_addc_u32 s13, s13, 0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_ashr_i32 s7, s6, 31
; CHECK-NEXT: s_lshl_b64 s[6:7], s[6:7], 3
; CHECK-NEXT: s_add_u32 s0, s0, s6
; CHECK-NEXT: s_addc_u32 s1, s1, s7
; CHECK-NEXT: s_ashr_i32 s9, s8, 31
; CHECK-NEXT: s_lshl_b64 s[4:5], s[8:9], 3
; CHECK-NEXT: s_add_u32 s0, s0, s4
; CHECK-NEXT: s_addc_u32 s1, s1, s5
; CHECK-NEXT: s_add_u32 s0, s0, -8
; CHECK-NEXT: s_mov_b64 s[4:5], src_shared_base
; CHECK-NEXT: s_mov_b64 s[6:7], src_shared_base
; CHECK-NEXT: s_addc_u32 s1, s1, -1
; CHECK-NEXT: s_cmp_eq_u32 s1, s5
; CHECK-NEXT: s_cmp_eq_u32 s1, s7
; CHECK-NEXT: s_cselect_b64 s[4:5], -1, 0
; CHECK-NEXT: s_andn2_b64 vcc, exec, s[4:5]
; CHECK-NEXT: s_mov_b64 s[4:5], -1

View File

@ -56,19 +56,19 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
;
; GFX9V4-LABEL: addrspacecast:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9V4-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX9V4-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX9V4-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
; GFX9V4-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9V4-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX9V4-NEXT: v_mov_b32_e32 v4, 1
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V4-NEXT: s_cmp_lg_u32 s0, -1
; GFX9V4-NEXT: s_cselect_b32 s2, s3, 0
; GFX9V4-NEXT: s_cselect_b32 s0, s0, 0
; GFX9V4-NEXT: s_cmp_lg_u32 s1, -1
; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
; GFX9V4-NEXT: v_mov_b32_e32 v1, s2
; GFX9V4-NEXT: s_cselect_b32 s0, s5, 0
; GFX9V4-NEXT: s_cselect_b32 s1, s1, 0
; GFX9V4-NEXT: s_cmp_lg_u32 s4, -1
; GFX9V4-NEXT: s_cselect_b32 s0, s1, 0
; GFX9V4-NEXT: s_cselect_b32 s1, s4, 0
; GFX9V4-NEXT: s_cmp_lg_u32 s5, -1
; GFX9V4-NEXT: v_mov_b32_e32 v0, s1
; GFX9V4-NEXT: v_mov_b32_e32 v1, s0
; GFX9V4-NEXT: s_cselect_b32 s0, s3, 0
; GFX9V4-NEXT: s_cselect_b32 s1, s5, 0
; GFX9V4-NEXT: v_mov_b32_e32 v2, s1
; GFX9V4-NEXT: v_mov_b32_e32 v3, s0
; GFX9V4-NEXT: flat_store_dword v[0:1], v4
@ -80,19 +80,19 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
;
; GFX9V5-LABEL: addrspacecast:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9V5-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX9V5-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX9V5-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
; GFX9V5-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9V5-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX9V5-NEXT: v_mov_b32_e32 v4, 1
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V5-NEXT: s_cmp_lg_u32 s0, -1
; GFX9V5-NEXT: s_cselect_b32 s2, s3, 0
; GFX9V5-NEXT: s_cselect_b32 s0, s0, 0
; GFX9V5-NEXT: s_cmp_lg_u32 s1, -1
; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
; GFX9V5-NEXT: v_mov_b32_e32 v1, s2
; GFX9V5-NEXT: s_cselect_b32 s0, s5, 0
; GFX9V5-NEXT: s_cselect_b32 s1, s1, 0
; GFX9V5-NEXT: s_cmp_lg_u32 s4, -1
; GFX9V5-NEXT: s_cselect_b32 s0, s1, 0
; GFX9V5-NEXT: s_cselect_b32 s1, s4, 0
; GFX9V5-NEXT: s_cmp_lg_u32 s5, -1
; GFX9V5-NEXT: v_mov_b32_e32 v0, s1
; GFX9V5-NEXT: v_mov_b32_e32 v1, s0
; GFX9V5-NEXT: s_cselect_b32 s0, s3, 0
; GFX9V5-NEXT: s_cselect_b32 s1, s5, 0
; GFX9V5-NEXT: v_mov_b32_e32 v2, s1
; GFX9V5-NEXT: v_mov_b32_e32 v3, s0
; GFX9V5-NEXT: flat_store_dword v[0:1], v4
@ -136,10 +136,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) #0 {
;
; GFX9V4-LABEL: llvm_amdgcn_is_shared:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dword s2, s[8:9], 0x4
; GFX9V4-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX9V4-NEXT: s_load_dword s0, s[8:9], 0x4
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V4-NEXT: s_cmp_eq_u32 s2, s1
; GFX9V4-NEXT: s_cmp_eq_u32 s0, s1
; GFX9V4-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX9V4-NEXT: global_store_dword v[0:1], v0, off
@ -148,10 +148,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) #0 {
;
; GFX9V5-LABEL: llvm_amdgcn_is_shared:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: s_load_dword s2, s[8:9], 0x4
; GFX9V5-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX9V5-NEXT: s_load_dword s0, s[8:9], 0x4
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V5-NEXT: s_cmp_eq_u32 s2, s1
; GFX9V5-NEXT: s_cmp_eq_u32 s0, s1
; GFX9V5-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX9V5-NEXT: global_store_dword v[0:1], v0, off
@ -190,10 +190,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) #0 {
;
; GFX9V4-LABEL: llvm_amdgcn_is_private:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dword s2, s[8:9], 0x4
; GFX9V4-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9V4-NEXT: s_load_dword s0, s[8:9], 0x4
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V4-NEXT: s_cmp_eq_u32 s2, s1
; GFX9V4-NEXT: s_cmp_eq_u32 s0, s1
; GFX9V4-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX9V4-NEXT: global_store_dword v[0:1], v0, off
@ -202,10 +202,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) #0 {
;
; GFX9V5-LABEL: llvm_amdgcn_is_private:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: s_load_dword s2, s[8:9], 0x4
; GFX9V5-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9V5-NEXT: s_load_dword s0, s[8:9], 0x4
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V5-NEXT: s_cmp_eq_u32 s2, s1
; GFX9V5-NEXT: s_cmp_eq_u32 s0, s1
; GFX9V5-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX9V5-NEXT: global_store_dword v[0:1], v0, off

View File

@ -46,12 +46,12 @@ define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
;
; GFX9-LABEL: is_private_vgpr:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s1, v1
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX9-NEXT: global_store_dword v[0:1], v0, off
@ -79,13 +79,12 @@ define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
;
; GFX10-LABEL: is_private_vgpr:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX10-NEXT: global_store_dword v[0:1], v0, off
@ -93,14 +92,14 @@ define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
;
; GFX11-LABEL: is_private_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_load_b64 v[0:1], v0, s[0:1] glc dlc
; GFX11-NEXT: global_load_b64 v[0:1], v0, s[2:3] glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
@ -156,10 +155,10 @@ define amdgpu_kernel void @is_private_sgpr(ptr %ptr) {
;
; GFX9-SDAG-LABEL: is_private_sgpr:
; GFX9-SDAG: ; %bb.0:
; GFX9-SDAG-NEXT: s_load_dword s2, s[8:9], 0x4
; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9-SDAG-NEXT: s_load_dword s0, s[8:9], 0x4
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-SDAG-NEXT: s_cmp_eq_u32 s2, s1
; GFX9-SDAG-NEXT: s_cmp_eq_u32 s0, s1
; GFX9-SDAG-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9-SDAG-NEXT: s_andn2_b64 vcc, exec, s[0:1]
; GFX9-SDAG-NEXT: s_cbranch_vccnz .LBB1_2
@ -190,10 +189,10 @@ define amdgpu_kernel void @is_private_sgpr(ptr %ptr) {
;
; GFX9-GISEL-LABEL: is_private_sgpr:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-GISEL-NEXT: s_cmp_lg_u32 s1, s3
; GFX9-GISEL-NEXT: s_cmp_lg_u32 s3, s1
; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB1_2
; GFX9-GISEL-NEXT: ; %bb.1: ; %bb0
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
@ -204,10 +203,10 @@ define amdgpu_kernel void @is_private_sgpr(ptr %ptr) {
;
; GFX10-LABEL: is_private_sgpr:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX10-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
; GFX10-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_cmp_lg_u32 s1, s3
; GFX10-NEXT: s_cmp_lg_u32 s3, s1
; GFX10-NEXT: s_cbranch_scc1 .LBB1_2
; GFX10-NEXT: ; %bb.1: ; %bb0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
@ -218,10 +217,10 @@ define amdgpu_kernel void @is_private_sgpr(ptr %ptr) {
;
; GFX11-LABEL: is_private_sgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX11-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_cmp_lg_u32 s1, s3
; GFX11-NEXT: s_cmp_lg_u32 s3, s1
; GFX11-NEXT: s_cbranch_scc1 .LBB1_2
; GFX11-NEXT: ; %bb.1: ; %bb0
; GFX11-NEXT: v_mov_b32_e32 v0, 0

View File

@ -81,12 +81,12 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
;
; GFX9-LABEL: is_local_vgpr:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s1, v1
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX9-NEXT: global_store_dword v[0:1], v0, off
@ -94,15 +94,14 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
;
; GFX1250-LABEL: is_local_vgpr:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b64 v[0:1], v0, s[0:1] scale_offset scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b64 v[0:1], v0, s[2:3] scale_offset scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX1250-NEXT: global_store_b32 v[0:1], v0, off
; GFX1250-NEXT: s_endpgm
@ -129,13 +128,12 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
;
; GFX10-LABEL: is_local_vgpr:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX10-NEXT: global_store_dword v[0:1], v0, off
@ -143,14 +141,14 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
;
; GFX11-LABEL: is_local_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_load_b64 v[0:1], v0, s[0:1] glc dlc
; GFX11-NEXT: global_load_b64 v[0:1], v0, s[2:3] glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
@ -240,10 +238,10 @@ define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
;
; GFX9-SDAG-LABEL: is_local_sgpr:
; GFX9-SDAG: ; %bb.0:
; GFX9-SDAG-NEXT: s_load_dword s2, s[8:9], 0x4
; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX9-SDAG-NEXT: s_load_dword s0, s[8:9], 0x4
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-SDAG-NEXT: s_cmp_eq_u32 s2, s1
; GFX9-SDAG-NEXT: s_cmp_eq_u32 s0, s1
; GFX9-SDAG-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9-SDAG-NEXT: s_andn2_b64 vcc, exec, s[0:1]
; GFX9-SDAG-NEXT: s_cbranch_vccnz .LBB1_2
@ -256,10 +254,10 @@ define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
;
; GFX1250-SDAG-LABEL: is_local_sgpr:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x4
; GFX1250-SDAG-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX1250-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x4
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s2, s1
; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s0, s1
; GFX1250-SDAG-NEXT: s_cselect_b32 s0, -1, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0
@ -291,10 +289,10 @@ define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
;
; GFX9-GISEL-LABEL: is_local_sgpr:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-GISEL-NEXT: s_cmp_lg_u32 s1, s3
; GFX9-GISEL-NEXT: s_cmp_lg_u32 s3, s1
; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB1_2
; GFX9-GISEL-NEXT: ; %bb.1: ; %bb0
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
@ -305,10 +303,10 @@ define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
;
; GFX10-LABEL: is_local_sgpr:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX10-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
; GFX10-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_cmp_lg_u32 s1, s3
; GFX10-NEXT: s_cmp_lg_u32 s3, s1
; GFX10-NEXT: s_cbranch_scc1 .LBB1_2
; GFX10-NEXT: ; %bb.1: ; %bb0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
@ -319,10 +317,10 @@ define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
;
; GFX11-LABEL: is_local_sgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX11-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_cmp_lg_u32 s1, s3
; GFX11-NEXT: s_cmp_lg_u32 s3, s1
; GFX11-NEXT: s_cbranch_scc1 .LBB1_2
; GFX11-NEXT: ; %bb.1: ; %bb0
; GFX11-NEXT: v_mov_b32_e32 v0, 0
@ -333,10 +331,10 @@ define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
;
; GFX1250-GISEL-LABEL: is_local_sgpr:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX1250-GISEL-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX1250-GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-GISEL-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: s_cmp_lg_u32 s1, s3
; GFX1250-GISEL-NEXT: s_cmp_lg_u32 s3, s1
; GFX1250-GISEL-NEXT: s_cbranch_scc1 .LBB1_2
; GFX1250-GISEL-NEXT: ; %bb.1: ; %bb0
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, 0

View File

@ -10,17 +10,17 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; CHECK-NEXT: v_pk_mov_b32 v[46:47], 0, 0
; CHECK-NEXT: flat_load_dword v42, v[46:47]
; CHECK-NEXT: s_load_dwordx4 s[64:67], s[8:9], 0x8
; CHECK-NEXT: s_load_dword s68, s[8:9], 0x0
; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9]
; CHECK-NEXT: s_load_dwordx4 s[64:67], s[34:35], 0x8
; CHECK-NEXT: s_load_dword s68, s[34:35], 0x0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: s_mov_b64 s[8:9], src_private_base
; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_cmp_lg_u32 s68, -1
; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9]
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: s_cselect_b32 s5, s5, 0
; CHECK-NEXT: s_cselect_b32 s5, s9, 0
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: s_cselect_b32 s6, s68, 0
; CHECK-NEXT: v_mov_b32_e32 v57, s5

View File

@ -14,17 +14,15 @@ define void @_Z12lane_pc_testj() #0 !dbg !9 {
; GCN-NEXT: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: ; %bb.1: ; %lab
; GCN-NEXT: s_mov_b64 s[4:5], 0
; GCN-NEXT: .Ltmp0:
; GCN-NEXT: .loc 0 12 1 prologue_end ; t.cpp:12:1
; GCN-NEXT: s_mov_b64 s[4:5], src_private_base
; GCN-NEXT: s_mov_b32 s6, 32
; GCN-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GCN-NEXT: s_mov_b64 s[6:7], 0
; GCN-NEXT: s_mov_b32 s5, -1
; GCN-NEXT: s_mov_b64 s[6:7], src_private_base
; GCN-NEXT: s_mov_b32 s6, -1
; GCN-NEXT: s_lshr_b32 s8, s32, 5
; GCN-NEXT: s_cmp_lg_u32 s8, s5
; GCN-NEXT: s_cselect_b32 s5, s4, s7
; GCN-NEXT: s_cselect_b32 s4, s8, s6
; GCN-NEXT: s_cmp_lg_u32 s8, s6
; GCN-NEXT: s_cselect_b32 s5, s7, s5
; GCN-NEXT: s_cselect_b32 s4, s8, s4
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: .loc 0 13 1 ; t.cpp:13:1
; GCN-NEXT: v_mov_b32_e32 v0, s4