[AMDGPU] Handle AV classes in SIFixSGPRCopies::processPHINode (#169038)

Fix a problem exposed by #166483 using AV classes in more places.
`isVectorRegister` only accepts registers of VGPR or AGPR classes.
`hasVectorRegisters` additionally accepts the combined AV classes.

Fixes: #168761
This commit is contained in:
Jay Foad 2025-11-21 15:17:55 +00:00 committed by GitHub
parent bc323b609b
commit 0b6db777ba
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 2472 additions and 1245 deletions

View File

@ -856,8 +856,8 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
}
}
if (TRI->isVectorRegister(*MRI, PHIRes) ||
RC0 == &AMDGPU::VReg_1RegClass) {
if (TRI->hasVectorRegisters(MRI->getRegClass(PHIRes)) ||
RC0 == &AMDGPU::VReg_1RegClass) {
LLVM_DEBUG(dbgs() << "Legalizing PHI: " << MI);
TII->legalizeOperands(MI, MDT);
}

File diff suppressed because it is too large Load Diff

View File

@ -48,16 +48,17 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
; CHECK-NEXT: .LBB0_1: ; %Flow9
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_andn2_b64 vcc, exec, s[24:25]
; CHECK-NEXT: s_cbranch_vccz .LBB0_17
; CHECK-NEXT: v_mov_b64_e32 v[30:31], v[24:25]
; CHECK-NEXT: s_cbranch_vccz .LBB0_18
; CHECK-NEXT: .LBB0_2: ; %._crit_edge1942.i.i.i3548
; CHECK-NEXT: ; =>This Loop Header: Depth=1
; CHECK-NEXT: ; Child Loop BB0_6 Depth 2
; CHECK-NEXT: ; Child Loop BB0_7 Depth 2
; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1]
; CHECK-NEXT: s_cbranch_vccnz .LBB0_9
; CHECK-NEXT: s_cbranch_vccnz .LBB0_11
; CHECK-NEXT: ; %bb.3: ; %.preheader1868.i.i.i3244
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_mov_b64 vcc, s[4:5]
; CHECK-NEXT: s_cbranch_vccz .LBB0_10
; CHECK-NEXT: s_cbranch_vccz .LBB0_12
; CHECK-NEXT: ; %bb.4: ; %.preheader1855.i.i.i3329.preheader
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: v_mov_b64_e32 v[24:25], s[14:15]
@ -85,49 +86,54 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
; CHECK-NEXT: v_fmac_f64_e32 v[26:27], 0, v[28:29]
; CHECK-NEXT: v_mov_b64_e32 v[28:29], v[18:19]
; CHECK-NEXT: v_fmac_f64_e32 v[28:29], 0, v[26:27]
; CHECK-NEXT: s_branch .LBB0_6
; CHECK-NEXT: .LBB0_5: ; %Flow
; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2
; CHECK-NEXT: s_branch .LBB0_7
; CHECK-NEXT: .LBB0_5: ; in Loop: Header=BB0_7 Depth=2
; CHECK-NEXT: s_mov_b64 s[24:25], -1
; CHECK-NEXT: ; implicit-def: $agpr0_agpr1
; CHECK-NEXT: s_mov_b64 s[8:9], -1
; CHECK-NEXT: .LBB0_6: ; %Flow
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=2
; CHECK-NEXT: s_and_b64 vcc, exec, s[8:9]
; CHECK-NEXT: s_cbranch_vccnz .LBB0_11
; CHECK-NEXT: .LBB0_6: ; %.preheader1855.i.i.i3329
; CHECK-NEXT: s_cbranch_vccnz .LBB0_13
; CHECK-NEXT: .LBB0_7: ; %.preheader1855.i.i.i3329
; CHECK-NEXT: ; Parent Loop BB0_2 Depth=1
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
; CHECK-NEXT: v_accvgpr_read_b32 v27, a1
; CHECK-NEXT: v_accvgpr_read_b32 v26, a0
; CHECK-NEXT: s_mov_b64 s[24:25], -1
; CHECK-NEXT: s_mov_b64 s[8:9], -1
; CHECK-NEXT: s_mov_b64 vcc, s[2:3]
; CHECK-NEXT: ; implicit-def: $agpr0_agpr1
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
; CHECK-NEXT: ; %bb.7: ; %.lr.ph2070.i.i.i3291
; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2
; CHECK-NEXT: v_accvgpr_write_b32 a0, v30
; CHECK-NEXT: v_accvgpr_write_b32 a1, v31
; CHECK-NEXT: s_mov_b64 s[8:9], s[18:19]
; CHECK-NEXT: ; %bb.8: ; %.lr.ph2070.i.i.i3291
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=2
; CHECK-NEXT: s_mov_b64 vcc, s[6:7]
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
; CHECK-NEXT: ; %bb.8: ; %.preheader1856.preheader.i.i.i3325
; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2
; CHECK-NEXT: s_cbranch_vccz .LBB0_10
; CHECK-NEXT: ; %bb.9: ; %.preheader1856.preheader.i.i.i3325
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=2
; CHECK-NEXT: v_accvgpr_write_b32 a0, v28
; CHECK-NEXT: s_mov_b64 s[24:25], 0
; CHECK-NEXT: v_accvgpr_write_b32 a1, v29
; CHECK-NEXT: s_mov_b64 s[8:9], 0
; CHECK-NEXT: s_branch .LBB0_5
; CHECK-NEXT: .LBB0_9: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_branch .LBB0_6
; CHECK-NEXT: .LBB0_10: ; in Loop: Header=BB0_7 Depth=2
; CHECK-NEXT: v_accvgpr_write_b32 a0, v30
; CHECK-NEXT: s_mov_b64 s[24:25], -1
; CHECK-NEXT: v_accvgpr_write_b32 a1, v31
; CHECK-NEXT: s_mov_b64 s[8:9], s[18:19]
; CHECK-NEXT: s_branch .LBB0_6
; CHECK-NEXT: .LBB0_11: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: v_mov_b64_e32 v[24:25], s[10:11]
; CHECK-NEXT: s_mov_b64 s[22:23], 0
; CHECK-NEXT: v_mov_b64_e32 v[30:31], s[10:11]
; CHECK-NEXT: s_mov_b64 s[8:9], s[20:21]
; CHECK-NEXT: s_branch .LBB0_15
; CHECK-NEXT: .LBB0_10: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_branch .LBB0_16
; CHECK-NEXT: .LBB0_12: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_mov_b64 s[8:9], -1
; CHECK-NEXT: v_mov_b64_e32 v[22:23], 0
; CHECK-NEXT: s_branch .LBB0_15
; CHECK-NEXT: .LBB0_11: ; %loop.exit.guard
; CHECK-NEXT: v_mov_b64_e32 v[24:25], v[30:31]
; CHECK-NEXT: s_branch .LBB0_16
; CHECK-NEXT: .LBB0_13: ; %loop.exit.guard
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_and_b64 vcc, exec, s[24:25]
; CHECK-NEXT: s_cbranch_vccz .LBB0_13
; CHECK-NEXT: ; %bb.12: ; %._crit_edge2105.i.i.i2330.loopexit
; CHECK-NEXT: s_cbranch_vccz .LBB0_15
; CHECK-NEXT: ; %bb.14: ; %._crit_edge2105.i.i.i2330.loopexit
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: v_cmp_nlg_f64_e64 s[8:9], 0, v[26:27]
; CHECK-NEXT: v_cndmask_b32_e64 v23, v23, 0, s[16:17]
@ -139,24 +145,21 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
; CHECK-NEXT: s_cselect_b32 s23, s23, 0
; CHECK-NEXT: s_cselect_b32 s22, s22, 0
; CHECK-NEXT: s_mov_b64 s[8:9], -1
; CHECK-NEXT: s_branch .LBB0_14
; CHECK-NEXT: .LBB0_13: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_branch .LBB0_16
; CHECK-NEXT: .LBB0_15: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_mov_b64 s[8:9], 0
; CHECK-NEXT: v_mov_b64_e32 v[22:23], 0
; CHECK-NEXT: .LBB0_14: ; %Flow6
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: v_mov_b64_e32 v[30:31], v[24:25]
; CHECK-NEXT: .LBB0_15: ; %Flow6
; CHECK-NEXT: .LBB0_16: ; %Flow6
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_mov_b64 s[24:25], -1
; CHECK-NEXT: s_and_b64 vcc, exec, s[8:9]
; CHECK-NEXT: s_cbranch_vccz .LBB0_1
; CHECK-NEXT: ; %bb.16: ; %._crit_edge2105.i.i.i2330
; CHECK-NEXT: ; %bb.17: ; %._crit_edge2105.i.i.i2330
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_mov_b64 s[24:25], 0
; CHECK-NEXT: global_store_dwordx2 v20, v[20:21], s[12:13]
; CHECK-NEXT: s_branch .LBB0_1
; CHECK-NEXT: .LBB0_17: ; %DummyReturnBlock
; CHECK-NEXT: .LBB0_18: ; %DummyReturnBlock
; CHECK-NEXT: s_endpgm
entry:
br label %._crit_edge1942.i.i.i3548

View File

@ -467,7 +467,6 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr57, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
@ -489,12 +488,12 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.40.Flow23:
; GFX90A-NEXT: successors: %bb.38(0x80000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr40_sgpr41, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_XOR_B64 $exec, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_AND_B64 killed renamable $sgpr44_sgpr45, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc
@ -509,7 +508,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.41.bb41:
; GFX90A-NEXT: successors: %bb.46(0x40000000), %bb.42(0x40000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr66_sgpr67, $sgpr68_sgpr69
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr66_sgpr67, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr58 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $vgpr1, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, $vgpr41, $vcc, 0, implicit $exec
@ -539,17 +538,17 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.42.Flow24:
; GFX90A-NEXT: successors: %bb.40(0x80000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr42_sgpr43, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_XOR_B64 $exec, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc
@ -561,8 +560,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr20, 16, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_CSELECT_B64 -1, 0, implicit killed $scc
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_XOR_B64 renamable $sgpr64_sgpr65, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_CSELECT_B64 -1, 0, implicit killed $scc
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_XOR_B64 renamable $sgpr66_sgpr67, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $vgpr62 = V_ADD_CO_U32_e32 6144, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $vgpr63, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr18_sgpr19, implicit-def dead $scc
@ -606,7 +605,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.46.bb48:
; GFX90A-NEXT: successors: %bb.43(0x40000000), %bb.47(0x40000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr66_sgpr67, $sgpr58_sgpr59, $sgpr68_sgpr69, $sgpr64_sgpr65, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr66_sgpr67, $sgpr58_sgpr59, $sgpr64_sgpr65, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr60 = V_ADD_CO_U32_e32 5120, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc
@ -615,6 +614,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE killed renamable $vgpr0_vgpr1, 1024, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i51)
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = COPY renamable $sgpr36_sgpr37
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr70_sgpr71 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr61, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $sgpr18_sgpr19, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
@ -646,7 +646,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr70_sgpr71, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc
@ -655,7 +655,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.48.bb63:
; GFX90A-NEXT: successors: %bb.50(0x40000000), %bb.49(0x40000000)
; GFX90A-NEXT: liveins: $vcc, $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
; GFX90A-NEXT: liveins: $vcc, $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.50, implicit $vcc
@ -669,7 +669,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.50.bb68:
; GFX90A-NEXT: successors: %bb.54(0x40000000), %bb.51(0x40000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57:0x000000000000000F, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = nuw nsw V_LSHLREV_B32_e32 3, $vgpr6, implicit $exec
; GFX90A-NEXT: renamable $vgpr1 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
@ -698,7 +698,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.52.bb80:
; GFX90A-NEXT: successors: %bb.59(0x40000000), %bb.53(0x40000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr17 = S_BFE_U32 renamable $sgpr20, 65560, implicit-def dead $scc
; GFX90A-NEXT: S_CMP_EQ_U32 killed renamable $sgpr17, 0, implicit-def $scc
@ -712,7 +712,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = COPY renamable $sgpr36_sgpr37
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = COPY renamable $sgpr36_sgpr37
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr7 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
@ -727,7 +727,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.54.bb73:
; GFX90A-NEXT: successors: %bb.52(0x40000000), %bb.55(0x40000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr3 = FLAT_LOAD_UBYTE renamable $vgpr0_vgpr1, 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i76)
; GFX90A-NEXT: renamable $vgpr8 = V_ADD_CO_U32_e32 2048, $vgpr0, implicit-def $vcc, implicit $exec
@ -759,9 +759,9 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.56.bb90:
; GFX90A-NEXT: successors: %bb.60(0x80000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr30 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr64_sgpr65, implicit $exec
; GFX90A-NEXT: renamable $vgpr30 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr66_sgpr67, implicit $exec
; GFX90A-NEXT: renamable $vgpr3 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr7 = COPY renamable $sgpr21, implicit $exec
; GFX90A-NEXT: renamable $vgpr24_vgpr25 = DS_READ_B64_gfx9 killed renamable $vgpr7, 0, 0, implicit $exec :: (load (s64) from %ir.4, addrspace 3)
@ -773,7 +773,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr7 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec
; GFX90A-NEXT: renamable $vgpr14_vgpr15 = V_LSHRREV_B64_e64 1, $vgpr22_vgpr23, implicit $exec
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_XOR_B64 $exec, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_OR_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_OR_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $vgpr16 = COPY renamable $vgpr22, implicit $exec
; GFX90A-NEXT: S_BRANCH %bb.60
; GFX90A-NEXT: {{ $}}
@ -833,14 +833,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.59.bb85:
; GFX90A-NEXT: successors: %bb.56(0x40000000), %bb.60(0x40000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr12 = V_OR_B32_e32 1, $vgpr10, implicit $exec
; GFX90A-NEXT: renamable $vgpr13 = COPY renamable $vgpr11, implicit $exec
; GFX90A-NEXT: renamable $vgpr3 = FLAT_LOAD_UBYTE renamable $vgpr12_vgpr13, 0, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i86)
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr3, implicit $exec
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = COPY renamable $sgpr36_sgpr37
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = COPY renamable $sgpr36_sgpr37
; GFX90A-NEXT: renamable $vgpr7 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
@ -855,20 +855,20 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.60.Flow31:
; GFX90A-NEXT: successors: %bb.61(0x80000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr54_sgpr55, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.61.Flow30:
; GFX90A-NEXT: successors: %bb.55(0x80000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_XOR_B64 $exec, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_OR_B64 killed renamable $sgpr52_sgpr53, killed renamable $sgpr56_sgpr57, implicit-def dead $scc
; GFX90A-NEXT: S_BRANCH %bb.55
; GFX90A-NEXT: {{ $}}

View File

@ -0,0 +1,37 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck %s
; Check that the copy from s[2:3] to v[0:1] occurs inside the loop, not after it.
define i64 @test_temporal_divergence(i32 %arg) #0 {
; CHECK-LABEL: test_temporal_divergence:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_add_u32_e32 v2, 1, v0
; CHECK-NEXT: s_mov_b64 s[2:3], 0
; CHECK-NEXT: s_mov_b64 s[0:1], 0
; CHECK-NEXT: .LBB0_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: v_add_u32_e32 v2, -1, v2
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; CHECK-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
; CHECK-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; CHECK-NEXT: s_mov_b64 s[2:3], 1
; CHECK-NEXT: s_andn2_b64 exec, exec, s[0:1]
; CHECK-NEXT: s_cbranch_execnz .LBB0_1
; CHECK-NEXT: ; %bb.2: ; %end
; CHECK-NEXT: s_or_b64 exec, exec, s[0:1]
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
br label %loop
loop:
%i = phi i64 [ 1, %loop ], [ 0, %entry ]
%count = phi i32 [ %inc, %loop ], [ 0, %entry ]
%inc = add i32 %count, 1
%cond = icmp eq i32 %count, %arg
br i1 %cond, label %end, label %loop
end:
ret i64 %i
}

View File

@ -217,6 +217,7 @@ define <16 x i8> @uniform_masked_load_ptr1_mask_v16i8(ptr addrspace(1) inreg noc
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: v_mov_b32_e32 v2, 0
@ -236,7 +237,7 @@ define <16 x i8> @uniform_masked_load_ptr1_mask_v16i8(ptr addrspace(1) inreg noc
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-NEXT: s_cbranch_execz .LBB8_2
; GFX942-NEXT: ; %bb.1: ; %cond.load
; GFX942-NEXT: global_load_dwordx4 v[16:19], v16, s[0:1]
; GFX942-NEXT: global_load_dwordx4 v[16:19], v0, s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: v_lshrrev_b32_e32 v15, 24, v19
; GFX942-NEXT: v_lshrrev_b32_e32 v14, 16, v19

File diff suppressed because it is too large Load Diff

View File

@ -6,8 +6,8 @@ define amdgpu_kernel void @matmul_kernel(i32 %a0, i32 %a1) {
; GFX942-LABEL: matmul_kernel:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: s_mov_b32 s2, 0
; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: v_accvgpr_write_b32 a1, 0
; GFX942-NEXT: s_mov_b32 s3, 0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)

View File

@ -722,8 +722,8 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v2, vcc
; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, 0x2800, v1
; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], 0, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, 0x7f
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], 0, 0
; GFX90A-NEXT: s_movk_i32 s2, 0xf000
; GFX90A-NEXT: s_movk_i32 s3, 0x1000
; GFX90A-NEXT: s_movk_i32 s4, 0x2000

View File

@ -113,16 +113,16 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: v_readlane_b32 s6, v57, 6
; GLOBALNESS1-NEXT: v_readlane_b32 s7, v57, 7
; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7]
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_28
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_3
; GLOBALNESS1-NEXT: .LBB1_2: ; %Flow15
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[4:5]
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], 0
; GLOBALNESS1-NEXT: .LBB1_3: ; %Flow28
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7]
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[58:59], v[0:1], v[0:1] op_sel:[0,1]
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_29
; GLOBALNESS1-NEXT: s_branch .LBB1_4
; GLOBALNESS1-NEXT: .LBB1_3: ; %bb73.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
; GLOBALNESS1-NEXT: s_branch .LBB1_2
; GLOBALNESS1-NEXT: .LBB1_4: ; %bb5
; GLOBALNESS1-NEXT: ; =>This Loop Header: Depth=1
; GLOBALNESS1-NEXT: ; Child Loop BB1_16 Depth 2
@ -171,10 +171,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0
; GLOBALNESS1-NEXT: .LBB1_9: ; %Flow25
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1
; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[4:5]
; GLOBALNESS1-NEXT: ; implicit-def: $vgpr0_vgpr1
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_3
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_24
; GLOBALNESS1-NEXT: ; %bb.10: ; %baz.exit.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: flat_load_dword v0, v[44:45]
@ -183,7 +181,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0
; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0x3ff00000
; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[52:53], s[86:87]
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_25
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_26
; GLOBALNESS1-NEXT: ; %bb.11: ; %bb33.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: global_load_dwordx2 v[0:1], v[44:45], off
@ -212,7 +210,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: .LBB1_15: ; %bb63.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[68:69]
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_24
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_25
; GLOBALNESS1-NEXT: .LBB1_16: ; %bb44.i
; GLOBALNESS1-NEXT: ; Parent Loop BB1_4 Depth=1
; GLOBALNESS1-NEXT: ; => This Inner Loop Header: Depth=2
@ -273,7 +271,11 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
; GLOBALNESS1-NEXT: s_branch .LBB1_14
; GLOBALNESS1-NEXT: .LBB1_24: ; %Flow23
; GLOBALNESS1-NEXT: .LBB1_24: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: ; implicit-def: $vgpr58_vgpr59
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_4
; GLOBALNESS1-NEXT: s_branch .LBB1_29
; GLOBALNESS1-NEXT: .LBB1_25: ; %Flow23
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: s_load_dwordx4 s[4:7], s[38:39], 0x0
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0
@ -283,25 +285,21 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS1-NEXT: s_mov_b32 s55, s7
; GLOBALNESS1-NEXT: v_readlane_b32 s9, v57, 11
; GLOBALNESS1-NEXT: .LBB1_25: ; %Flow24
; GLOBALNESS1-NEXT: .LBB1_26: ; %Flow24
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[52:53]
; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[86:87]
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_2
; GLOBALNESS1-NEXT: ; %bb.26: ; %bb67.i
; GLOBALNESS1-NEXT: ; %bb.27: ; %bb67.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: v_readlane_b32 s6, v57, 4
; GLOBALNESS1-NEXT: v_readlane_b32 s7, v57, 5
; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7]
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_1
; GLOBALNESS1-NEXT: ; %bb.27: ; %bb69.i
; GLOBALNESS1-NEXT: ; %bb.28: ; %bb69.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
; GLOBALNESS1-NEXT: s_branch .LBB1_1
; GLOBALNESS1-NEXT: .LBB1_28: ; %bb73.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
; GLOBALNESS1-NEXT: s_branch .LBB1_2
; GLOBALNESS1-NEXT: .LBB1_29: ; %loop.exit.guard
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[8:9]
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1
@ -424,16 +422,16 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: v_readlane_b32 s6, v57, 6
; GLOBALNESS0-NEXT: v_readlane_b32 s7, v57, 7
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7]
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_28
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_3
; GLOBALNESS0-NEXT: .LBB1_2: ; %Flow15
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[4:5]
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], 0
; GLOBALNESS0-NEXT: .LBB1_3: ; %Flow28
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7]
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[58:59], v[0:1], v[0:1] op_sel:[0,1]
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_29
; GLOBALNESS0-NEXT: s_branch .LBB1_4
; GLOBALNESS0-NEXT: .LBB1_3: ; %bb73.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
; GLOBALNESS0-NEXT: s_branch .LBB1_2
; GLOBALNESS0-NEXT: .LBB1_4: ; %bb5
; GLOBALNESS0-NEXT: ; =>This Loop Header: Depth=1
; GLOBALNESS0-NEXT: ; Child Loop BB1_16 Depth 2
@ -482,10 +480,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0
; GLOBALNESS0-NEXT: .LBB1_9: ; %Flow25
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[4:5]
; GLOBALNESS0-NEXT: ; implicit-def: $vgpr0_vgpr1
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_3
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_24
; GLOBALNESS0-NEXT: ; %bb.10: ; %baz.exit.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: flat_load_dword v0, v[44:45]
@ -494,7 +490,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0
; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0x3ff00000
; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[52:53], s[86:87]
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_25
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_26
; GLOBALNESS0-NEXT: ; %bb.11: ; %bb33.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: global_load_dwordx2 v[0:1], v[44:45], off
@ -524,7 +520,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: .LBB1_15: ; %bb63.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[68:69]
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_24
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_25
; GLOBALNESS0-NEXT: .LBB1_16: ; %bb44.i
; GLOBALNESS0-NEXT: ; Parent Loop BB1_4 Depth=1
; GLOBALNESS0-NEXT: ; => This Inner Loop Header: Depth=2
@ -585,7 +581,11 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
; GLOBALNESS0-NEXT: s_branch .LBB1_14
; GLOBALNESS0-NEXT: .LBB1_24: ; %Flow23
; GLOBALNESS0-NEXT: .LBB1_24: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: ; implicit-def: $vgpr58_vgpr59
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_4
; GLOBALNESS0-NEXT: s_branch .LBB1_29
; GLOBALNESS0-NEXT: .LBB1_25: ; %Flow23
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0
; GLOBALNESS0-NEXT: s_mov_b32 s55, s83
@ -593,25 +593,21 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: v_readlane_b32 s85, v57, 9
; GLOBALNESS0-NEXT: v_readlane_b32 s8, v57, 10
; GLOBALNESS0-NEXT: v_readlane_b32 s9, v57, 11
; GLOBALNESS0-NEXT: .LBB1_25: ; %Flow24
; GLOBALNESS0-NEXT: .LBB1_26: ; %Flow24
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[52:53]
; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[86:87]
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_2
; GLOBALNESS0-NEXT: ; %bb.26: ; %bb67.i
; GLOBALNESS0-NEXT: ; %bb.27: ; %bb67.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: v_readlane_b32 s6, v57, 4
; GLOBALNESS0-NEXT: v_readlane_b32 s7, v57, 5
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7]
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_1
; GLOBALNESS0-NEXT: ; %bb.27: ; %bb69.i
; GLOBALNESS0-NEXT: ; %bb.28: ; %bb69.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
; GLOBALNESS0-NEXT: s_branch .LBB1_1
; GLOBALNESS0-NEXT: .LBB1_28: ; %bb73.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
; GLOBALNESS0-NEXT: s_branch .LBB1_2
; GLOBALNESS0-NEXT: .LBB1_29: ; %loop.exit.guard
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[8:9]
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1