[AMDGPU] Handle AV classes in SIFixSGPRCopies::processPHINode (#169038)
Fix a problem exposed by #166483 using AV classes in more places. `isVectorRegister` only accepts registers of VGPR or AGPR classes. `hasVectorRegisters` additionally accepts the combined AV classes. Fixes: #168761
This commit is contained in:
parent
bc323b609b
commit
0b6db777ba
@ -856,8 +856,8 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
|
||||
}
|
||||
}
|
||||
|
||||
if (TRI->isVectorRegister(*MRI, PHIRes) ||
|
||||
RC0 == &AMDGPU::VReg_1RegClass) {
|
||||
if (TRI->hasVectorRegisters(MRI->getRegClass(PHIRes)) ||
|
||||
RC0 == &AMDGPU::VReg_1RegClass) {
|
||||
LLVM_DEBUG(dbgs() << "Legalizing PHI: " << MI);
|
||||
TII->legalizeOperands(MI, MDT);
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -48,16 +48,17 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
|
||||
; CHECK-NEXT: .LBB0_1: ; %Flow9
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: s_andn2_b64 vcc, exec, s[24:25]
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB0_17
|
||||
; CHECK-NEXT: v_mov_b64_e32 v[30:31], v[24:25]
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB0_18
|
||||
; CHECK-NEXT: .LBB0_2: ; %._crit_edge1942.i.i.i3548
|
||||
; CHECK-NEXT: ; =>This Loop Header: Depth=1
|
||||
; CHECK-NEXT: ; Child Loop BB0_6 Depth 2
|
||||
; CHECK-NEXT: ; Child Loop BB0_7 Depth 2
|
||||
; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1]
|
||||
; CHECK-NEXT: s_cbranch_vccnz .LBB0_9
|
||||
; CHECK-NEXT: s_cbranch_vccnz .LBB0_11
|
||||
; CHECK-NEXT: ; %bb.3: ; %.preheader1868.i.i.i3244
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: s_mov_b64 vcc, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB0_10
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB0_12
|
||||
; CHECK-NEXT: ; %bb.4: ; %.preheader1855.i.i.i3329.preheader
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: v_mov_b64_e32 v[24:25], s[14:15]
|
||||
@ -85,49 +86,54 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
|
||||
; CHECK-NEXT: v_fmac_f64_e32 v[26:27], 0, v[28:29]
|
||||
; CHECK-NEXT: v_mov_b64_e32 v[28:29], v[18:19]
|
||||
; CHECK-NEXT: v_fmac_f64_e32 v[28:29], 0, v[26:27]
|
||||
; CHECK-NEXT: s_branch .LBB0_6
|
||||
; CHECK-NEXT: .LBB0_5: ; %Flow
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2
|
||||
; CHECK-NEXT: s_branch .LBB0_7
|
||||
; CHECK-NEXT: .LBB0_5: ; in Loop: Header=BB0_7 Depth=2
|
||||
; CHECK-NEXT: s_mov_b64 s[24:25], -1
|
||||
; CHECK-NEXT: ; implicit-def: $agpr0_agpr1
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], -1
|
||||
; CHECK-NEXT: .LBB0_6: ; %Flow
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=2
|
||||
; CHECK-NEXT: s_and_b64 vcc, exec, s[8:9]
|
||||
; CHECK-NEXT: s_cbranch_vccnz .LBB0_11
|
||||
; CHECK-NEXT: .LBB0_6: ; %.preheader1855.i.i.i3329
|
||||
; CHECK-NEXT: s_cbranch_vccnz .LBB0_13
|
||||
; CHECK-NEXT: .LBB0_7: ; %.preheader1855.i.i.i3329
|
||||
; CHECK-NEXT: ; Parent Loop BB0_2 Depth=1
|
||||
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v27, a1
|
||||
; CHECK-NEXT: v_accvgpr_read_b32 v26, a0
|
||||
; CHECK-NEXT: s_mov_b64 s[24:25], -1
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], -1
|
||||
; CHECK-NEXT: s_mov_b64 vcc, s[2:3]
|
||||
; CHECK-NEXT: ; implicit-def: $agpr0_agpr1
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
|
||||
; CHECK-NEXT: ; %bb.7: ; %.lr.ph2070.i.i.i3291
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a0, v30
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a1, v31
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[18:19]
|
||||
; CHECK-NEXT: ; %bb.8: ; %.lr.ph2070.i.i.i3291
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=2
|
||||
; CHECK-NEXT: s_mov_b64 vcc, s[6:7]
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
|
||||
; CHECK-NEXT: ; %bb.8: ; %.preheader1856.preheader.i.i.i3325
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB0_10
|
||||
; CHECK-NEXT: ; %bb.9: ; %.preheader1856.preheader.i.i.i3325
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=2
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a0, v28
|
||||
; CHECK-NEXT: s_mov_b64 s[24:25], 0
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a1, v29
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], 0
|
||||
; CHECK-NEXT: s_branch .LBB0_5
|
||||
; CHECK-NEXT: .LBB0_9: ; in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: s_branch .LBB0_6
|
||||
; CHECK-NEXT: .LBB0_10: ; in Loop: Header=BB0_7 Depth=2
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a0, v30
|
||||
; CHECK-NEXT: s_mov_b64 s[24:25], -1
|
||||
; CHECK-NEXT: v_accvgpr_write_b32 a1, v31
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[18:19]
|
||||
; CHECK-NEXT: s_branch .LBB0_6
|
||||
; CHECK-NEXT: .LBB0_11: ; in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: v_mov_b64_e32 v[24:25], s[10:11]
|
||||
; CHECK-NEXT: s_mov_b64 s[22:23], 0
|
||||
; CHECK-NEXT: v_mov_b64_e32 v[30:31], s[10:11]
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[20:21]
|
||||
; CHECK-NEXT: s_branch .LBB0_15
|
||||
; CHECK-NEXT: .LBB0_10: ; in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: s_branch .LBB0_16
|
||||
; CHECK-NEXT: .LBB0_12: ; in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], -1
|
||||
; CHECK-NEXT: v_mov_b64_e32 v[22:23], 0
|
||||
; CHECK-NEXT: s_branch .LBB0_15
|
||||
; CHECK-NEXT: .LBB0_11: ; %loop.exit.guard
|
||||
; CHECK-NEXT: v_mov_b64_e32 v[24:25], v[30:31]
|
||||
; CHECK-NEXT: s_branch .LBB0_16
|
||||
; CHECK-NEXT: .LBB0_13: ; %loop.exit.guard
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: s_and_b64 vcc, exec, s[24:25]
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB0_13
|
||||
; CHECK-NEXT: ; %bb.12: ; %._crit_edge2105.i.i.i2330.loopexit
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB0_15
|
||||
; CHECK-NEXT: ; %bb.14: ; %._crit_edge2105.i.i.i2330.loopexit
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: v_cmp_nlg_f64_e64 s[8:9], 0, v[26:27]
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v23, v23, 0, s[16:17]
|
||||
@ -139,24 +145,21 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
|
||||
; CHECK-NEXT: s_cselect_b32 s23, s23, 0
|
||||
; CHECK-NEXT: s_cselect_b32 s22, s22, 0
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], -1
|
||||
; CHECK-NEXT: s_branch .LBB0_14
|
||||
; CHECK-NEXT: .LBB0_13: ; in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: s_branch .LBB0_16
|
||||
; CHECK-NEXT: .LBB0_15: ; in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], 0
|
||||
; CHECK-NEXT: v_mov_b64_e32 v[22:23], 0
|
||||
; CHECK-NEXT: .LBB0_14: ; %Flow6
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: v_mov_b64_e32 v[30:31], v[24:25]
|
||||
; CHECK-NEXT: .LBB0_15: ; %Flow6
|
||||
; CHECK-NEXT: .LBB0_16: ; %Flow6
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: s_mov_b64 s[24:25], -1
|
||||
; CHECK-NEXT: s_and_b64 vcc, exec, s[8:9]
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB0_1
|
||||
; CHECK-NEXT: ; %bb.16: ; %._crit_edge2105.i.i.i2330
|
||||
; CHECK-NEXT: ; %bb.17: ; %._crit_edge2105.i.i.i2330
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: s_mov_b64 s[24:25], 0
|
||||
; CHECK-NEXT: global_store_dwordx2 v20, v[20:21], s[12:13]
|
||||
; CHECK-NEXT: s_branch .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_17: ; %DummyReturnBlock
|
||||
; CHECK-NEXT: .LBB0_18: ; %DummyReturnBlock
|
||||
; CHECK-NEXT: s_endpgm
|
||||
entry:
|
||||
br label %._crit_edge1942.i.i.i3548
|
||||
|
||||
@ -467,7 +467,6 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $vgpr57, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
|
||||
@ -489,12 +488,12 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.40.Flow23:
|
||||
; GFX90A-NEXT: successors: %bb.38(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr40_sgpr41, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_XOR_B64 $exec, -1, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_AND_B64 killed renamable $sgpr44_sgpr45, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc
|
||||
@ -509,7 +508,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.41.bb41:
|
||||
; GFX90A-NEXT: successors: %bb.46(0x40000000), %bb.42(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr66_sgpr67, $sgpr68_sgpr69
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr66_sgpr67, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr58 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr1, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, $vgpr41, $vcc, 0, implicit $exec
|
||||
@ -539,17 +538,17 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.42.Flow24:
|
||||
; GFX90A-NEXT: successors: %bb.40(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr42_sgpr43, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_XOR_B64 $exec, -1, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc
|
||||
@ -561,8 +560,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr20, 16, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_CSELECT_B64 -1, 0, implicit killed $scc
|
||||
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_XOR_B64 renamable $sgpr64_sgpr65, -1, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_CSELECT_B64 -1, 0, implicit killed $scc
|
||||
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_XOR_B64 renamable $sgpr66_sgpr67, -1, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $vgpr62 = V_ADD_CO_U32_e32 6144, $vgpr40, implicit-def $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr63, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr18_sgpr19, implicit-def dead $scc
|
||||
@ -606,7 +605,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.46.bb48:
|
||||
; GFX90A-NEXT: successors: %bb.43(0x40000000), %bb.47(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr66_sgpr67, $sgpr58_sgpr59, $sgpr68_sgpr69, $sgpr64_sgpr65, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr66_sgpr67, $sgpr58_sgpr59, $sgpr64_sgpr65, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr60 = V_ADD_CO_U32_e32 5120, $vgpr40, implicit-def $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc
|
||||
@ -615,6 +614,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE killed renamable $vgpr0_vgpr1, 1024, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i51)
|
||||
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = COPY renamable $sgpr36_sgpr37
|
||||
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $sgpr70_sgpr71 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $vgpr61, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $sgpr18_sgpr19, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
|
||||
@ -646,7 +646,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr70_sgpr71, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc
|
||||
@ -655,7 +655,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.48.bb63:
|
||||
; GFX90A-NEXT: successors: %bb.50(0x40000000), %bb.49(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $vcc, $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
|
||||
; GFX90A-NEXT: liveins: $vcc, $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.50, implicit $vcc
|
||||
@ -669,7 +669,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.50.bb68:
|
||||
; GFX90A-NEXT: successors: %bb.54(0x40000000), %bb.51(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57:0x000000000000000F, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr0 = nuw nsw V_LSHLREV_B32_e32 3, $vgpr6, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr1 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
@ -698,7 +698,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.52.bb80:
|
||||
; GFX90A-NEXT: successors: %bb.59(0x40000000), %bb.53(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr17 = S_BFE_U32 renamable $sgpr20, 65560, implicit-def dead $scc
|
||||
; GFX90A-NEXT: S_CMP_EQ_U32 killed renamable $sgpr17, 0, implicit-def $scc
|
||||
@ -712,7 +712,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = COPY renamable $sgpr36_sgpr37
|
||||
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = COPY renamable $sgpr36_sgpr37
|
||||
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
|
||||
@ -727,7 +727,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.54.bb73:
|
||||
; GFX90A-NEXT: successors: %bb.52(0x40000000), %bb.55(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr3 = FLAT_LOAD_UBYTE renamable $vgpr0_vgpr1, 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i76)
|
||||
; GFX90A-NEXT: renamable $vgpr8 = V_ADD_CO_U32_e32 2048, $vgpr0, implicit-def $vcc, implicit $exec
|
||||
@ -759,9 +759,9 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.56.bb90:
|
||||
; GFX90A-NEXT: successors: %bb.60(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr30 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr64_sgpr65, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr30 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr66_sgpr67, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr3 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr7 = COPY renamable $sgpr21, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr24_vgpr25 = DS_READ_B64_gfx9 killed renamable $vgpr7, 0, 0, implicit $exec :: (load (s64) from %ir.4, addrspace 3)
|
||||
@ -773,7 +773,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: renamable $vgpr7 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr14_vgpr15 = V_LSHRREV_B64_e64 1, $vgpr22_vgpr23, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_XOR_B64 $exec, -1, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_OR_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_OR_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $vgpr16 = COPY renamable $vgpr22, implicit $exec
|
||||
; GFX90A-NEXT: S_BRANCH %bb.60
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
@ -833,14 +833,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.59.bb85:
|
||||
; GFX90A-NEXT: successors: %bb.56(0x40000000), %bb.60(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr12 = V_OR_B32_e32 1, $vgpr10, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr13 = COPY renamable $vgpr11, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr3 = FLAT_LOAD_UBYTE renamable $vgpr12_vgpr13, 0, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i86)
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr3, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = COPY renamable $sgpr36_sgpr37
|
||||
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = COPY renamable $sgpr36_sgpr37
|
||||
; GFX90A-NEXT: renamable $vgpr7 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $vgpr22 = IMPLICIT_DEF
|
||||
@ -855,20 +855,20 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.60.Flow31:
|
||||
; GFX90A-NEXT: successors: %bb.61(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr54_sgpr55, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.61.Flow30:
|
||||
; GFX90A-NEXT: successors: %bb.55(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_XOR_B64 $exec, -1, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_OR_B64 killed renamable $sgpr52_sgpr53, killed renamable $sgpr56_sgpr57, implicit-def dead $scc
|
||||
; GFX90A-NEXT: S_BRANCH %bb.55
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
|
||||
@ -0,0 +1,37 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck %s
|
||||
|
||||
; Check that the copy from s[2:3] to v[0:1] occurs inside the loop, not after it.
|
||||
|
||||
define i64 @test_temporal_divergence(i32 %arg) #0 {
|
||||
; CHECK-LABEL: test_temporal_divergence:
|
||||
; CHECK: ; %bb.0: ; %entry
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_add_u32_e32 v2, 1, v0
|
||||
; CHECK-NEXT: s_mov_b64 s[2:3], 0
|
||||
; CHECK-NEXT: s_mov_b64 s[0:1], 0
|
||||
; CHECK-NEXT: .LBB0_1: ; %loop
|
||||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: v_add_u32_e32 v2, -1, v2
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; CHECK-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
|
||||
; CHECK-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
|
||||
; CHECK-NEXT: s_mov_b64 s[2:3], 1
|
||||
; CHECK-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
||||
; CHECK-NEXT: s_cbranch_execnz .LBB0_1
|
||||
; CHECK-NEXT: ; %bb.2: ; %end
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%i = phi i64 [ 1, %loop ], [ 0, %entry ]
|
||||
%count = phi i32 [ %inc, %loop ], [ 0, %entry ]
|
||||
%inc = add i32 %count, 1
|
||||
%cond = icmp eq i32 %count, %arg
|
||||
br i1 %cond, label %end, label %loop
|
||||
|
||||
end:
|
||||
ret i64 %i
|
||||
}
|
||||
@ -217,6 +217,7 @@ define <16 x i8> @uniform_masked_load_ptr1_mask_v16i8(ptr addrspace(1) inreg noc
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
|
||||
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX942-NEXT: v_mov_b32_e32 v16, 0
|
||||
; GFX942-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
||||
@ -236,7 +237,7 @@ define <16 x i8> @uniform_masked_load_ptr1_mask_v16i8(ptr addrspace(1) inreg noc
|
||||
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB8_2
|
||||
; GFX942-NEXT: ; %bb.1: ; %cond.load
|
||||
; GFX942-NEXT: global_load_dwordx4 v[16:19], v16, s[0:1]
|
||||
; GFX942-NEXT: global_load_dwordx4 v[16:19], v0, s[0:1]
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v15, 24, v19
|
||||
; GFX942-NEXT: v_lshrrev_b32_e32 v14, 16, v19
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -6,8 +6,8 @@ define amdgpu_kernel void @matmul_kernel(i32 %a0, i32 %a1) {
|
||||
; GFX942-LABEL: matmul_kernel:
|
||||
; GFX942: ; %bb.0: ; %entry
|
||||
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX942-NEXT: s_mov_b32 s2, 0
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX942-NEXT: v_accvgpr_write_b32 a1, 0
|
||||
; GFX942-NEXT: s_mov_b32 s3, 0
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
|
||||
@ -722,8 +722,8 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
|
||||
; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v2, vcc
|
||||
; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, 0x2800, v1
|
||||
; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], 0, 0
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v1, 0x7f
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], 0, 0
|
||||
; GFX90A-NEXT: s_movk_i32 s2, 0xf000
|
||||
; GFX90A-NEXT: s_movk_i32 s3, 0x1000
|
||||
; GFX90A-NEXT: s_movk_i32 s4, 0x2000
|
||||
|
||||
@ -113,16 +113,16 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS1-NEXT: v_readlane_b32 s6, v57, 6
|
||||
; GLOBALNESS1-NEXT: v_readlane_b32 s7, v57, 7
|
||||
; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7]
|
||||
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_28
|
||||
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_3
|
||||
; GLOBALNESS1-NEXT: .LBB1_2: ; %Flow15
|
||||
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], 0
|
||||
; GLOBALNESS1-NEXT: .LBB1_3: ; %Flow28
|
||||
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7]
|
||||
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[58:59], v[0:1], v[0:1] op_sel:[0,1]
|
||||
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_29
|
||||
; GLOBALNESS1-NEXT: s_branch .LBB1_4
|
||||
; GLOBALNESS1-NEXT: .LBB1_3: ; %bb73.i
|
||||
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
|
||||
; GLOBALNESS1-NEXT: s_branch .LBB1_2
|
||||
; GLOBALNESS1-NEXT: .LBB1_4: ; %bb5
|
||||
; GLOBALNESS1-NEXT: ; =>This Loop Header: Depth=1
|
||||
; GLOBALNESS1-NEXT: ; Child Loop BB1_16 Depth 2
|
||||
@ -171,10 +171,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0
|
||||
; GLOBALNESS1-NEXT: .LBB1_9: ; %Flow25
|
||||
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1
|
||||
; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[4:5]
|
||||
; GLOBALNESS1-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_3
|
||||
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_24
|
||||
; GLOBALNESS1-NEXT: ; %bb.10: ; %baz.exit.i
|
||||
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS1-NEXT: flat_load_dword v0, v[44:45]
|
||||
@ -183,7 +181,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0x3ff00000
|
||||
; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[52:53], s[86:87]
|
||||
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_25
|
||||
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_26
|
||||
; GLOBALNESS1-NEXT: ; %bb.11: ; %bb33.i
|
||||
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS1-NEXT: global_load_dwordx2 v[0:1], v[44:45], off
|
||||
@ -212,7 +210,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS1-NEXT: .LBB1_15: ; %bb63.i
|
||||
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
|
||||
; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[68:69]
|
||||
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_24
|
||||
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_25
|
||||
; GLOBALNESS1-NEXT: .LBB1_16: ; %bb44.i
|
||||
; GLOBALNESS1-NEXT: ; Parent Loop BB1_4 Depth=1
|
||||
; GLOBALNESS1-NEXT: ; => This Inner Loop Header: Depth=2
|
||||
@ -273,7 +271,11 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
|
||||
; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
|
||||
; GLOBALNESS1-NEXT: s_branch .LBB1_14
|
||||
; GLOBALNESS1-NEXT: .LBB1_24: ; %Flow23
|
||||
; GLOBALNESS1-NEXT: .LBB1_24: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS1-NEXT: ; implicit-def: $vgpr58_vgpr59
|
||||
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_4
|
||||
; GLOBALNESS1-NEXT: s_branch .LBB1_29
|
||||
; GLOBALNESS1-NEXT: .LBB1_25: ; %Flow23
|
||||
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS1-NEXT: s_load_dwordx4 s[4:7], s[38:39], 0x0
|
||||
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0
|
||||
@ -283,25 +285,21 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GLOBALNESS1-NEXT: s_mov_b32 s55, s7
|
||||
; GLOBALNESS1-NEXT: v_readlane_b32 s9, v57, 11
|
||||
; GLOBALNESS1-NEXT: .LBB1_25: ; %Flow24
|
||||
; GLOBALNESS1-NEXT: .LBB1_26: ; %Flow24
|
||||
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[52:53]
|
||||
; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[86:87]
|
||||
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_2
|
||||
; GLOBALNESS1-NEXT: ; %bb.26: ; %bb67.i
|
||||
; GLOBALNESS1-NEXT: ; %bb.27: ; %bb67.i
|
||||
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS1-NEXT: v_readlane_b32 s6, v57, 4
|
||||
; GLOBALNESS1-NEXT: v_readlane_b32 s7, v57, 5
|
||||
; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7]
|
||||
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_1
|
||||
; GLOBALNESS1-NEXT: ; %bb.27: ; %bb69.i
|
||||
; GLOBALNESS1-NEXT: ; %bb.28: ; %bb69.i
|
||||
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
|
||||
; GLOBALNESS1-NEXT: s_branch .LBB1_1
|
||||
; GLOBALNESS1-NEXT: .LBB1_28: ; %bb73.i
|
||||
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
|
||||
; GLOBALNESS1-NEXT: s_branch .LBB1_2
|
||||
; GLOBALNESS1-NEXT: .LBB1_29: ; %loop.exit.guard
|
||||
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[8:9]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1
|
||||
@ -424,16 +422,16 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS0-NEXT: v_readlane_b32 s6, v57, 6
|
||||
; GLOBALNESS0-NEXT: v_readlane_b32 s7, v57, 7
|
||||
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7]
|
||||
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_28
|
||||
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_3
|
||||
; GLOBALNESS0-NEXT: .LBB1_2: ; %Flow15
|
||||
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], 0
|
||||
; GLOBALNESS0-NEXT: .LBB1_3: ; %Flow28
|
||||
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7]
|
||||
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[58:59], v[0:1], v[0:1] op_sel:[0,1]
|
||||
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_29
|
||||
; GLOBALNESS0-NEXT: s_branch .LBB1_4
|
||||
; GLOBALNESS0-NEXT: .LBB1_3: ; %bb73.i
|
||||
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
|
||||
; GLOBALNESS0-NEXT: s_branch .LBB1_2
|
||||
; GLOBALNESS0-NEXT: .LBB1_4: ; %bb5
|
||||
; GLOBALNESS0-NEXT: ; =>This Loop Header: Depth=1
|
||||
; GLOBALNESS0-NEXT: ; Child Loop BB1_16 Depth 2
|
||||
@ -482,10 +480,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0
|
||||
; GLOBALNESS0-NEXT: .LBB1_9: ; %Flow25
|
||||
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1
|
||||
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[4:5]
|
||||
; GLOBALNESS0-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_3
|
||||
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_24
|
||||
; GLOBALNESS0-NEXT: ; %bb.10: ; %baz.exit.i
|
||||
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS0-NEXT: flat_load_dword v0, v[44:45]
|
||||
@ -494,7 +490,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0x3ff00000
|
||||
; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[52:53], s[86:87]
|
||||
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_25
|
||||
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_26
|
||||
; GLOBALNESS0-NEXT: ; %bb.11: ; %bb33.i
|
||||
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS0-NEXT: global_load_dwordx2 v[0:1], v[44:45], off
|
||||
@ -524,7 +520,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS0-NEXT: .LBB1_15: ; %bb63.i
|
||||
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
|
||||
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[68:69]
|
||||
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_24
|
||||
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_25
|
||||
; GLOBALNESS0-NEXT: .LBB1_16: ; %bb44.i
|
||||
; GLOBALNESS0-NEXT: ; Parent Loop BB1_4 Depth=1
|
||||
; GLOBALNESS0-NEXT: ; => This Inner Loop Header: Depth=2
|
||||
@ -585,7 +581,11 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
|
||||
; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
|
||||
; GLOBALNESS0-NEXT: s_branch .LBB1_14
|
||||
; GLOBALNESS0-NEXT: .LBB1_24: ; %Flow23
|
||||
; GLOBALNESS0-NEXT: .LBB1_24: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS0-NEXT: ; implicit-def: $vgpr58_vgpr59
|
||||
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_4
|
||||
; GLOBALNESS0-NEXT: s_branch .LBB1_29
|
||||
; GLOBALNESS0-NEXT: .LBB1_25: ; %Flow23
|
||||
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0
|
||||
; GLOBALNESS0-NEXT: s_mov_b32 s55, s83
|
||||
@ -593,25 +593,21 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS0-NEXT: v_readlane_b32 s85, v57, 9
|
||||
; GLOBALNESS0-NEXT: v_readlane_b32 s8, v57, 10
|
||||
; GLOBALNESS0-NEXT: v_readlane_b32 s9, v57, 11
|
||||
; GLOBALNESS0-NEXT: .LBB1_25: ; %Flow24
|
||||
; GLOBALNESS0-NEXT: .LBB1_26: ; %Flow24
|
||||
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[52:53]
|
||||
; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[86:87]
|
||||
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_2
|
||||
; GLOBALNESS0-NEXT: ; %bb.26: ; %bb67.i
|
||||
; GLOBALNESS0-NEXT: ; %bb.27: ; %bb67.i
|
||||
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS0-NEXT: v_readlane_b32 s6, v57, 4
|
||||
; GLOBALNESS0-NEXT: v_readlane_b32 s7, v57, 5
|
||||
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7]
|
||||
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_1
|
||||
; GLOBALNESS0-NEXT: ; %bb.27: ; %bb69.i
|
||||
; GLOBALNESS0-NEXT: ; %bb.28: ; %bb69.i
|
||||
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
|
||||
; GLOBALNESS0-NEXT: s_branch .LBB1_1
|
||||
; GLOBALNESS0-NEXT: .LBB1_28: ; %bb73.i
|
||||
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
|
||||
; GLOBALNESS0-NEXT: s_branch .LBB1_2
|
||||
; GLOBALNESS0-NEXT: .LBB1_29: ; %loop.exit.guard
|
||||
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[8:9]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user