[CodeGen] Do not remove IMPLICIT_DEF unless all uses have undef flag added (#188133)
Do not remove IMPLICIT_DEF of a physreg unless all uses have an undef flag added. Previously, only the first use instruction had undef flags added. This will cause a failure in machine instruction verification. Multi-instruction uses tested in AMDGPU/multi-use-implicit-def.mir and X86/multi-use-implicit-def.mir. --------- Signed-off-by: John Lu <John.Lu@amd.com>
This commit is contained in:
parent
54914a4287
commit
c245d764b8
@ -102,38 +102,63 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
|
||||
}
|
||||
|
||||
// This is a physreg implicit-def.
|
||||
// Look for the first instruction to use or define an alias.
|
||||
MachineBasicBlock::instr_iterator UserMI = MI->getIterator();
|
||||
MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end();
|
||||
bool Found = false;
|
||||
for (++UserMI; UserMI != UserE; ++UserMI) {
|
||||
for (MachineOperand &MO : UserMI->operands()) {
|
||||
if (!MO.isReg())
|
||||
continue;
|
||||
Register UserReg = MO.getReg();
|
||||
if (!UserReg.isPhysical() || !TRI->regsOverlap(Reg, UserReg))
|
||||
continue;
|
||||
// UserMI uses or redefines Reg. Set <undef> flags on all uses.
|
||||
Found = true;
|
||||
if (MO.isUse())
|
||||
MO.setIsUndef();
|
||||
}
|
||||
if (Found)
|
||||
break;
|
||||
}
|
||||
|
||||
// If we found the using MI, we can erase the IMPLICIT_DEF.
|
||||
if (Found) {
|
||||
LLVM_DEBUG(dbgs() << "Physreg user: " << *UserMI);
|
||||
MI->eraseFromParent();
|
||||
return;
|
||||
}
|
||||
|
||||
// Using instr wasn't found, it could be in another block.
|
||||
// Leave the physreg IMPLICIT_DEF, but trim any extra operands.
|
||||
// Trim any extra operands.
|
||||
for (unsigned i = MI->getNumOperands() - 1; i; --i)
|
||||
MI->removeOperand(i);
|
||||
LLVM_DEBUG(dbgs() << "Keeping physreg: " << *MI);
|
||||
|
||||
// Try to add undef flag to all uses. If all uses are updated remove
|
||||
// implicit-def.
|
||||
MachineBasicBlock::instr_iterator SearchMI = MI->getIterator();
|
||||
MachineBasicBlock::instr_iterator SearchE = MI->getParent()->instr_end();
|
||||
bool ImplicitDefIsDead = false;
|
||||
bool SearchedWholeBlock = true;
|
||||
constexpr unsigned SearchLimit = 35;
|
||||
unsigned Count = 0;
|
||||
for (++SearchMI; SearchMI != SearchE; ++SearchMI) {
|
||||
if (SearchMI->isDebugInstr())
|
||||
continue;
|
||||
if (++Count > SearchLimit) {
|
||||
SearchedWholeBlock = false;
|
||||
break;
|
||||
}
|
||||
for (MachineOperand &MO : SearchMI->operands()) {
|
||||
if (!MO.isReg())
|
||||
continue;
|
||||
Register SearchReg = MO.getReg();
|
||||
if (!SearchReg.isPhysical() || !TRI->regsOverlap(Reg, SearchReg))
|
||||
continue;
|
||||
// SearchMI uses or redefines Reg. Set <undef> flags on all uses.
|
||||
if (MO.isUse()) {
|
||||
if (TRI->isSubRegisterEq(Reg, SearchReg)) {
|
||||
MO.setIsUndef();
|
||||
} else {
|
||||
// Use is larger than Reg. It is not safe to add undef to this use.
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (MO.isDef()) {
|
||||
if (TRI->isSubRegisterEq(SearchReg, Reg)) {
|
||||
ImplicitDefIsDead = true;
|
||||
} else {
|
||||
// Reg is larger than definition. It is not safe to add undef to any
|
||||
// subsequent uses of Reg.
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ImplicitDefIsDead) {
|
||||
LLVM_DEBUG(dbgs() << "Physreg redefine: " << *SearchMI);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If we have added an undef flag to all uses (i.e. we have found a redefining
|
||||
// MI or there are no successors), we can erase the IMPLICIT_DEF.
|
||||
if (ImplicitDefIsDead ||
|
||||
(SearchedWholeBlock && MI->getParent()->succ_empty())) {
|
||||
MI->eraseFromParent();
|
||||
LLVM_DEBUG(dbgs() << "Deleting implicit-def: " << *MI);
|
||||
}
|
||||
}
|
||||
|
||||
bool ProcessImplicitDefsLegacy::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
@ -21,12 +21,12 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
|
||||
; CHECK-NEXT: s_cbranch_scc1 .LBB0_4
|
||||
; CHECK-NEXT: ; %bb.2: ; %if.else251.i.i
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s55, 0
|
||||
; CHECK-NEXT: s_mov_b32 s17, 0
|
||||
; CHECK-NEXT: s_cselect_b32 s12, -1, 0
|
||||
; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s12
|
||||
; CHECK-NEXT: s_mov_b32 s13, s15
|
||||
; CHECK-NEXT: s_cselect_b32 s17, -1, 0
|
||||
; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s17
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
|
||||
; CHECK-NEXT: ; %bb.3:
|
||||
; CHECK-NEXT: s_mov_b32 s18, 0
|
||||
; CHECK-NEXT: s_mov_b32 s15, 0
|
||||
; CHECK-NEXT: s_branch .LBB0_6
|
||||
; CHECK-NEXT: .LBB0_4:
|
||||
; CHECK-NEXT: s_mov_b32 s14, s12
|
||||
@ -36,36 +36,36 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
|
||||
; CHECK-NEXT: s_mov_b64 s[48:49], s[12:13]
|
||||
; CHECK-NEXT: s_branch .LBB0_8
|
||||
; CHECK-NEXT: .LBB0_5: ; %if.then263.i.i
|
||||
; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s53, 0
|
||||
; CHECK-NEXT: s_mov_b32 s18, 1.0
|
||||
; CHECK-NEXT: s_mov_b32 s17, 0x7fc00000
|
||||
; CHECK-NEXT: v_cmp_lt_f32_e64 s17, s53, 0
|
||||
; CHECK-NEXT: s_mov_b32 s15, 1.0
|
||||
; CHECK-NEXT: s_mov_b32 s12, 0x7fc00000
|
||||
; CHECK-NEXT: .LBB0_6: ; %Flow
|
||||
; CHECK-NEXT: s_mov_b32 s48, 1.0
|
||||
; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
|
||||
; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s17
|
||||
; CHECK-NEXT: s_mov_b32 s49, s48
|
||||
; CHECK-NEXT: s_mov_b32 s50, s48
|
||||
; CHECK-NEXT: s_mov_b32 s51, s48
|
||||
; CHECK-NEXT: s_cbranch_vccnz .LBB0_8
|
||||
; CHECK-NEXT: ; %bb.7: ; %if.end273.i.i
|
||||
; CHECK-NEXT: s_add_u32 s12, s8, 40
|
||||
; CHECK-NEXT: s_addc_u32 s13, s9, 0
|
||||
; CHECK-NEXT: s_add_u32 s18, s8, 40
|
||||
; CHECK-NEXT: s_addc_u32 s19, s9, 0
|
||||
; CHECK-NEXT: s_getpc_b64 s[20:21]
|
||||
; CHECK-NEXT: s_add_u32 s20, s20, _Z3dotDv3_fS_@gotpcrel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s21, s21, _Z3dotDv3_fS_@gotpcrel32@hi+12
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; CHECK-NEXT: s_load_dwordx2 s[20:21], s[20:21], 0x0
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 10, v1
|
||||
; CHECK-NEXT: v_add_f32_e64 v1, s17, s18
|
||||
; CHECK-NEXT: v_add_f32_e64 v1, s12, s15
|
||||
; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9]
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13]
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[18:19]
|
||||
; CHECK-NEXT: s_mov_b32 s12, s14
|
||||
; CHECK-NEXT: v_or3_b32 v31, v0, v3, v2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, v1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 0
|
||||
; CHECK-NEXT: s_mov_b32 s13, s15
|
||||
; CHECK-NEXT: s_mov_b32 s14, s16
|
||||
; CHECK-NEXT: s_mov_b32 s48, 0
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr15
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[20:21]
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[34:35]
|
||||
|
||||
@ -12,11 +12,12 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr13, 0, implicit-def dead $scc, implicit $scc
|
||||
; GFX90A-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr17, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: renamable $sgpr17 = COPY $sgpr15
|
||||
; GFX90A-NEXT: renamable $vgpr31 = COPY $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: early-clobber renamable $sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX4_IMM_ec renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s128) from %ir.arg3.kernarg.offset.align.down, align 8, addrspace 4)
|
||||
; GFX90A-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27 = S_LOAD_DWORDX4_IMM renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
|
||||
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1 + 16, align 16, addrspace 4)
|
||||
; GFX90A-NEXT: renamable $sgpr17 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg3.kernarg.offset.align.down + 16, align 8, addrspace 4)
|
||||
; GFX90A-NEXT: renamable $sgpr15 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg3.kernarg.offset.align.down + 16, align 8, addrspace 4)
|
||||
; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr20, 0, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_CSELECT_B64 -1, 0, implicit killed $scc
|
||||
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 -1
|
||||
@ -40,7 +41,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.2:
|
||||
; GFX90A-NEXT: successors: %bb.3(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr56, $sgpr57, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr4, $vgpr5
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr56, $sgpr57, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr4, $vgpr5
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF implicit-def $vgpr16
|
||||
; GFX90A-NEXT: renamable $vgpr3 = IMPLICIT_DEF implicit-def $vgpr2
|
||||
@ -51,7 +52,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.3.Flow17:
|
||||
; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.57(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr6 = V_AND_B32_e32 1023, $vgpr31, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr18_sgpr19, implicit-def dead $scc
|
||||
@ -59,7 +60,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.4.bb15:
|
||||
; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = nsw V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr25, implicit $exec
|
||||
@ -74,7 +75,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.5:
|
||||
; GFX90A-NEXT: successors: %bb.6(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
|
||||
@ -109,7 +110,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.6.Flow20:
|
||||
; GFX90A-NEXT: successors: %bb.7(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr26 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr28 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
@ -122,7 +123,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.7.Flow19:
|
||||
; GFX90A-NEXT: successors: %bb.62(0x40000000), %bb.8(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: $sgpr18_sgpr19 = S_AND_SAVEEXEC_B64 $sgpr36_sgpr37, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
@ -130,7 +131,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.8.Flow32:
|
||||
; GFX90A-NEXT: successors: %bb.9(0x40000000), %bb.10(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr18_sgpr19, implicit-def $scc
|
||||
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr40_sgpr41, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
@ -139,7 +140,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.9.bb89:
|
||||
; GFX90A-NEXT: successors: %bb.10(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
|
||||
@ -147,7 +148,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.10.Flow33:
|
||||
; GFX90A-NEXT: successors: %bb.11(0x40000000), %bb.12(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
|
||||
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr46_sgpr47, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
@ -156,7 +157,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.11.bb84:
|
||||
; GFX90A-NEXT: successors: %bb.12(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
|
||||
@ -164,7 +165,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.12.Flow34:
|
||||
; GFX90A-NEXT: successors: %bb.13(0x40000000), %bb.14(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
|
||||
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr44_sgpr45, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
@ -173,7 +174,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.13.bb79:
|
||||
; GFX90A-NEXT: successors: %bb.14(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
|
||||
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
|
||||
@ -181,7 +182,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.14.Flow35:
|
||||
; GFX90A-NEXT: successors: %bb.15(0x40000000), %bb.16(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
|
||||
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr42_sgpr43, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
@ -190,14 +191,15 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.15.bb72:
|
||||
; GFX90A-NEXT: successors: %bb.16(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr8 = S_ADD_U32 renamable $sgpr8, 48, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $sgpr9 = S_ADDC_U32 killed renamable $sgpr9, 0, implicit-def dead $scc, implicit killed $scc
|
||||
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @f2, target-flags(amdgpu-gotprel32-hi) @f2, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_LOAD_DWORDX2_IMM killed renamable $sgpr12_sgpr13, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
|
||||
; GFX90A-NEXT: dead $sgpr15 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: $sgpr12 = COPY killed renamable $sgpr14
|
||||
; GFX90A-NEXT: $sgpr13 = COPY killed renamable $sgpr15
|
||||
; GFX90A-NEXT: $sgpr13 = COPY killed renamable $sgpr17
|
||||
; GFX90A-NEXT: $sgpr14 = COPY killed renamable $sgpr16
|
||||
; GFX90A-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr18_sgpr19, @f2, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit undef $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1
|
||||
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
|
||||
@ -358,7 +360,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.35.bb20:
|
||||
; GFX90A-NEXT: successors: %bb.37(0x40000000), %bb.36(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_SBYTE renamable $vgpr40_vgpr41, 1024, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i23)
|
||||
; GFX90A-NEXT: renamable $vgpr42 = V_ADD_CO_U32_e32 1024, $vgpr40, implicit-def $vcc, implicit $exec
|
||||
@ -398,14 +400,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.36.Flow21:
|
||||
; GFX90A-NEXT: successors: %bb.6(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def $scc
|
||||
; GFX90A-NEXT: S_BRANCH %bb.6
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.37.bb27:
|
||||
; GFX90A-NEXT: successors: %bb.39(0x40000000), %bb.38(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr44_sgpr45, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67, $sgpr48_sgpr49
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr44_sgpr45, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67, $sgpr48_sgpr49
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE renamable $vgpr40_vgpr41, 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i30)
|
||||
; GFX90A-NEXT: renamable $vgpr44 = V_ADD_CO_U32_e32 2048, $vgpr40, implicit-def $vcc, implicit $exec
|
||||
@ -437,7 +439,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.38.Flow22:
|
||||
; GFX90A-NEXT: successors: %bb.36(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr38_sgpr39, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_XOR_B64 $exec, -1, implicit-def dead $scc
|
||||
@ -458,7 +460,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.39.bb34:
|
||||
; GFX90A-NEXT: successors: %bb.41(0x40000000), %bb.40(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr46_sgpr47, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr46_sgpr47, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE renamable $vgpr40_vgpr41, 3072, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i37)
|
||||
; GFX90A-NEXT: renamable $vgpr56 = V_ADD_CO_U32_e32 3072, $vgpr40, implicit-def $vcc, implicit $exec
|
||||
@ -489,7 +491,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.40.Flow23:
|
||||
; GFX90A-NEXT: successors: %bb.38(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr40_sgpr41, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_XOR_B64 $exec, -1, implicit-def dead $scc
|
||||
@ -509,7 +511,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.41.bb41:
|
||||
; GFX90A-NEXT: successors: %bb.46(0x40000000), %bb.42(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr66_sgpr67, $sgpr68_sgpr69
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr66_sgpr67, $sgpr68_sgpr69
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr58 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr1, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, $vgpr41, $vcc, 0, implicit $exec
|
||||
@ -539,7 +541,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.42.Flow24:
|
||||
; GFX90A-NEXT: successors: %bb.40(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr42_sgpr43, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_XOR_B64 $exec, -1, implicit-def dead $scc
|
||||
@ -558,7 +560,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.43.bb55:
|
||||
; GFX90A-NEXT: successors: %bb.48(0x40000000), %bb.44(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr20, 16, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_CSELECT_B64 -1, 0, implicit killed $scc
|
||||
@ -570,7 +572,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.44:
|
||||
; GFX90A-NEXT: successors: %bb.45(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr57, $vgpr62, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $vgpr40, $vgpr61, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr56, $vgpr47, $vgpr2, $vgpr4, $vgpr5, $vgpr6, $vgpr46, $vgpr45, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr58, $vgpr60, $vgpr63, $vgpr59
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr57, $vgpr62, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $vgpr40, $vgpr61, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr56, $vgpr47, $vgpr2, $vgpr4, $vgpr5, $vgpr6, $vgpr46, $vgpr45, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr58, $vgpr60, $vgpr63, $vgpr59
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37
|
||||
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = IMPLICIT_DEF
|
||||
@ -590,7 +592,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.45.Flow26:
|
||||
; GFX90A-NEXT: successors: %bb.47(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_XOR_B64 $exec, -1, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
|
||||
@ -606,7 +608,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.46.bb48:
|
||||
; GFX90A-NEXT: successors: %bb.43(0x40000000), %bb.47(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr66_sgpr67, $sgpr58_sgpr59, $sgpr68_sgpr69, $sgpr64_sgpr65, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr66_sgpr67, $sgpr58_sgpr59, $sgpr68_sgpr69, $sgpr64_sgpr65, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr60 = V_ADD_CO_U32_e32 5120, $vgpr40, implicit-def $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc
|
||||
@ -637,7 +639,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.47.Flow25:
|
||||
; GFX90A-NEXT: successors: %bb.42(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr58_sgpr59, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr58_sgpr59, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr44_sgpr45, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_XOR_B64 $exec, -1, implicit-def dead $scc
|
||||
@ -655,21 +657,21 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.48.bb63:
|
||||
; GFX90A-NEXT: successors: %bb.50(0x40000000), %bb.49(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $vcc, $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
|
||||
; GFX90A-NEXT: liveins: $vcc, $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.50, implicit $vcc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.49:
|
||||
; GFX90A-NEXT: successors: %bb.44(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: S_BRANCH %bb.44
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.50.bb68:
|
||||
; GFX90A-NEXT: successors: %bb.54(0x40000000), %bb.51(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr0 = nuw nsw V_LSHLREV_B32_e32 3, $vgpr6, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr1 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
@ -678,7 +680,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.51:
|
||||
; GFX90A-NEXT: successors: %bb.45(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37
|
||||
@ -698,16 +700,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.52.bb80:
|
||||
; GFX90A-NEXT: successors: %bb.59(0x40000000), %bb.53(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: dead renamable $sgpr17 = S_BFE_U32 renamable $sgpr20, 65560, implicit-def $scc
|
||||
; GFX90A-NEXT: dead renamable $sgpr15 = S_BFE_U32 renamable $sgpr20, 65560, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $vgpr10 = V_ADD_CO_U32_e32 4096, $vgpr0, implicit-def $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr11, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, 0, killed $vcc, 0, implicit $exec
|
||||
; GFX90A-NEXT: S_CBRANCH_SCC0 %bb.59, implicit killed $scc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.53:
|
||||
; GFX90A-NEXT: successors: %bb.61(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 -1
|
||||
@ -726,7 +728,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.54.bb73:
|
||||
; GFX90A-NEXT: successors: %bb.52(0x40000000), %bb.55(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr3 = FLAT_LOAD_UBYTE renamable $vgpr0_vgpr1, 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i76)
|
||||
; GFX90A-NEXT: renamable $vgpr8 = V_ADD_CO_U32_e32 2048, $vgpr0, implicit-def $vcc, implicit $exec
|
||||
@ -751,14 +753,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.55.Flow29:
|
||||
; GFX90A-NEXT: successors: %bb.45(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr62_sgpr63, implicit-def $scc
|
||||
; GFX90A-NEXT: S_BRANCH %bb.45
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.56.bb90:
|
||||
; GFX90A-NEXT: successors: %bb.60(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr30 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr64_sgpr65, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr3 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
@ -778,7 +780,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.57:
|
||||
; GFX90A-NEXT: successors: %bb.7(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr24 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr20 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
@ -823,7 +825,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: renamable $vgpr26_vgpr27 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.419, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr21, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2_vgpr3 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.4, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr17, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr15, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr16_vgpr17 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.420, addrspace 3)
|
||||
; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr22, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr32_vgpr33 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.5, addrspace 3)
|
||||
@ -832,7 +834,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.59.bb85:
|
||||
; GFX90A-NEXT: successors: %bb.56(0x40000000), %bb.60(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr12 = V_OR_B32_e32 1, $vgpr10, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr13 = COPY renamable $vgpr11, implicit $exec
|
||||
@ -854,14 +856,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.60.Flow31:
|
||||
; GFX90A-NEXT: successors: %bb.61(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr54_sgpr55, implicit-def $scc
|
||||
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.61.Flow30:
|
||||
; GFX90A-NEXT: successors: %bb.55(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_XOR_B64 $exec, -1, implicit-def dead $scc
|
||||
; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
|
||||
@ -873,7 +875,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.62.bb140:
|
||||
; GFX90A-NEXT: successors: %bb.68(0x40000000), %bb.63(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr24_sgpr25 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr30_sgpr31, implicit-def dead $scc
|
||||
@ -881,14 +883,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.63.Flow13:
|
||||
; GFX90A-NEXT: successors: %bb.64(0x40000000), %bb.66(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def dead $scc
|
||||
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.66, implicit $vcc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.64.bb159:
|
||||
; GFX90A-NEXT: successors: %bb.67(0x40000000), %bb.65(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vcc = V_CMP_NE_U32_e64 0, killed $vgpr6, implicit $exec
|
||||
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
@ -897,21 +899,21 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.65.Flow10:
|
||||
; GFX90A-NEXT: successors: %bb.66(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $sgpr12_sgpr13 = S_ANDN2_SAVEEXEC_B64 $sgpr12_sgpr13, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.66.Flow14:
|
||||
; GFX90A-NEXT: successors: %bb.8(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = COPY $exec
|
||||
; GFX90A-NEXT: S_BRANCH %bb.8
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.67.bb161:
|
||||
; GFX90A-NEXT: successors: %bb.65(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr27, killed $vgpr29, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr33, implicit $exec
|
||||
@ -930,7 +932,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.68.bb174:
|
||||
; GFX90A-NEXT: successors: %bb.72(0x40000000), %bb.69(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $agpr0 = COPY killed renamable $vgpr14, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr34 = V_OR_B32_e32 1, $vgpr32, implicit $exec
|
||||
@ -947,14 +949,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.69.Flow:
|
||||
; GFX90A-NEXT: successors: %bb.70(0x40000000), %bb.71(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
|
||||
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.71, implicit $vcc
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.70.bb186:
|
||||
; GFX90A-NEXT: successors: %bb.71(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr4_vgpr5 = nsw V_LSHLREV_B64_e64 3, killed $vgpr4_vgpr5, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr27, implicit $exec
|
||||
@ -983,7 +985,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.71.Flow9:
|
||||
; GFX90A-NEXT: successors: %bb.63(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr24_sgpr25 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $vgpr14 = COPY killed renamable $agpr0, implicit $exec
|
||||
@ -991,7 +993,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.72.bb196:
|
||||
; GFX90A-NEXT: successors: %bb.69(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 $vgpr14, killed $vgpr24, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr22 = V_OR_B32_e32 killed $vgpr2, killed $vgpr22, implicit $exec
|
||||
|
||||
@ -305,6 +305,7 @@ define amdgpu_kernel void @test_invoke() #0 personality ptr @__gxx_personality_v
|
||||
; SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; SDAG-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; SDAG-NEXT: v_mov_b32_e32 v0, 2.0
|
||||
; SDAG-NEXT: ; implicit-def: $sgpr15
|
||||
; SDAG-NEXT: s_mov_b32 s14, s16
|
||||
; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; SDAG-NEXT: v_add_f32_e32 v0, 1.0, v0
|
||||
@ -330,6 +331,7 @@ define amdgpu_kernel void @test_invoke() #0 personality ptr @__gxx_personality_v
|
||||
; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GISEL-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, 2.0
|
||||
; GISEL-NEXT: ; implicit-def: $sgpr15
|
||||
; GISEL-NEXT: s_mov_b32 s14, s16
|
||||
; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0
|
||||
|
||||
@ -5,6 +5,10 @@
|
||||
; A call should be skipped if all lanes are zero, since we don't know
|
||||
; what side effects should be avoided inside the call.
|
||||
define hidden void @func() #1 {
|
||||
; GCN-LABEL: func:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -85,19 +89,20 @@ define amdgpu_kernel void @if_call_kernel() #0 {
|
||||
; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; SDAG-NEXT: s_mov_b32 s32, 0
|
||||
; SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
|
||||
; SDAG-NEXT: s_and_saveexec_b64 s[12:13], vcc
|
||||
; SDAG-NEXT: s_and_saveexec_b64 s[18:19], vcc
|
||||
; SDAG-NEXT: s_cbranch_execz .LBB3_2
|
||||
; SDAG-NEXT: ; %bb.1: ; %call
|
||||
; SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; SDAG-NEXT: s_mov_b32 s13, s15
|
||||
; SDAG-NEXT: s_getpc_b64 s[18:19]
|
||||
; SDAG-NEXT: s_add_u32 s18, s18, func@rel32@lo+4
|
||||
; SDAG-NEXT: s_addc_u32 s19, s19, func@rel32@hi+12
|
||||
; SDAG-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; SDAG-NEXT: s_mov_b32 s12, s14
|
||||
; SDAG-NEXT: s_mov_b32 s13, s15
|
||||
; SDAG-NEXT: s_mov_b32 s14, s16
|
||||
; SDAG-NEXT: ; implicit-def: $sgpr15
|
||||
; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; SDAG-NEXT: .LBB3_2: ; %end
|
||||
; SDAG-NEXT: s_endpgm
|
||||
@ -111,19 +116,20 @@ define amdgpu_kernel void @if_call_kernel() #0 {
|
||||
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GISEL-NEXT: s_mov_b32 s32, 0
|
||||
; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
|
||||
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc
|
||||
; GISEL-NEXT: s_and_saveexec_b64 s[18:19], vcc
|
||||
; GISEL-NEXT: s_cbranch_execz .LBB3_2
|
||||
; GISEL-NEXT: ; %bb.1: ; %call
|
||||
; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2
|
||||
; GISEL-NEXT: s_mov_b32 s13, s15
|
||||
; GISEL-NEXT: s_getpc_b64 s[18:19]
|
||||
; GISEL-NEXT: s_add_u32 s18, s18, func@rel32@lo+4
|
||||
; GISEL-NEXT: s_addc_u32 s19, s19, func@rel32@hi+12
|
||||
; GISEL-NEXT: v_or_b32_e32 v31, v0, v1
|
||||
; GISEL-NEXT: s_mov_b32 s12, s14
|
||||
; GISEL-NEXT: s_mov_b32 s13, s15
|
||||
; GISEL-NEXT: s_mov_b32 s14, s16
|
||||
; GISEL-NEXT: ; implicit-def: $sgpr15
|
||||
; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GISEL-NEXT: .LBB3_2: ; %end
|
||||
; GISEL-NEXT: s_endpgm
|
||||
|
||||
@ -184,21 +184,22 @@ define amdgpu_kernel void @v3i16_registers(i1 %cond) #0 {
|
||||
; GCN-NEXT: s_mov_b32 s32, 0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_bitcmp1_b32 s12, 0
|
||||
; GCN-NEXT: s_cselect_b64 s[12:13], -1, 0
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, s[12:13]
|
||||
; GCN-NEXT: s_cselect_b64 s[18:19], -1, 0
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, s[18:19]
|
||||
; GCN-NEXT: s_cbranch_vccnz .LBB4_2
|
||||
; GCN-NEXT: ; %bb.1: ; %if.else
|
||||
; GCN-NEXT: s_add_u32 s8, s8, 8
|
||||
; GCN-NEXT: s_addc_u32 s9, s9, 0
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GCN-NEXT: s_mov_b32 s13, s15
|
||||
; GCN-NEXT: s_getpc_b64 s[18:19]
|
||||
; GCN-NEXT: s_add_u32 s18, s18, func_v3i16@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s19, s19, func_v3i16@rel32@hi+12
|
||||
; GCN-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GCN-NEXT: s_mov_b32 s12, s14
|
||||
; GCN-NEXT: s_mov_b32 s13, s15
|
||||
; GCN-NEXT: s_mov_b32 s14, s16
|
||||
; GCN-NEXT: ; implicit-def: $sgpr15
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GCN-NEXT: s_branch .LBB4_3
|
||||
; GCN-NEXT: .LBB4_2:
|
||||
@ -235,21 +236,22 @@ define amdgpu_kernel void @v3f16_registers(i1 %cond) #0 {
|
||||
; GCN-NEXT: s_mov_b32 s32, 0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_bitcmp1_b32 s12, 0
|
||||
; GCN-NEXT: s_cselect_b64 s[12:13], -1, 0
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, s[12:13]
|
||||
; GCN-NEXT: s_cselect_b64 s[18:19], -1, 0
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, s[18:19]
|
||||
; GCN-NEXT: s_cbranch_vccnz .LBB5_2
|
||||
; GCN-NEXT: ; %bb.1: ; %if.else
|
||||
; GCN-NEXT: s_add_u32 s8, s8, 8
|
||||
; GCN-NEXT: s_addc_u32 s9, s9, 0
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GCN-NEXT: s_mov_b32 s13, s15
|
||||
; GCN-NEXT: s_getpc_b64 s[18:19]
|
||||
; GCN-NEXT: s_add_u32 s18, s18, func_v3f16@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s19, s19, func_v3f16@rel32@hi+12
|
||||
; GCN-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GCN-NEXT: s_mov_b32 s12, s14
|
||||
; GCN-NEXT: s_mov_b32 s13, s15
|
||||
; GCN-NEXT: s_mov_b32 s14, s16
|
||||
; GCN-NEXT: ; implicit-def: $sgpr15
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GCN-NEXT: s_branch .LBB5_3
|
||||
; GCN-NEXT: .LBB5_2:
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -294,6 +294,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
|
||||
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
@ -374,6 +375,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
|
||||
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-NEXT: s_mov_b64 s[0:1], exec
|
||||
@ -444,6 +446,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
|
||||
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
@ -500,6 +503,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
|
||||
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
@ -545,6 +549,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
|
||||
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
@ -587,16 +592,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
|
||||
; GFX1132-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
|
||||
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
|
||||
; GFX1132-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1132-NEXT: .LBB1_1: ; %ComputeLoop
|
||||
@ -652,6 +659,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
|
||||
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
|
||||
@ -706,6 +714,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
|
||||
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
|
||||
@ -797,6 +806,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -870,6 +880,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
@ -925,6 +936,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -990,16 +1002,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
|
||||
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
|
||||
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
|
||||
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
|
||||
@ -1327,6 +1341,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
|
||||
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
@ -1407,6 +1422,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
|
||||
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-NEXT: s_mov_b64 s[0:1], exec
|
||||
@ -1477,6 +1493,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
|
||||
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
@ -1533,6 +1550,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
|
||||
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
@ -1578,6 +1596,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
|
||||
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
@ -1620,16 +1639,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
|
||||
; GFX1132-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
|
||||
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
|
||||
; GFX1132-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1132-NEXT: .LBB3_1: ; %ComputeLoop
|
||||
@ -1685,6 +1706,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
|
||||
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
|
||||
@ -1739,6 +1761,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
|
||||
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
|
||||
@ -1830,6 +1853,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -1903,6 +1927,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
@ -1958,6 +1983,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -2023,16 +2049,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
|
||||
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
|
||||
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
|
||||
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
|
||||
@ -2360,6 +2388,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
|
||||
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
@ -2440,6 +2469,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
|
||||
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-NEXT: s_mov_b64 s[0:1], exec
|
||||
@ -2510,6 +2540,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
|
||||
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
@ -2566,6 +2597,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
|
||||
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
@ -2611,6 +2643,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
|
||||
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
@ -2653,16 +2686,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
|
||||
; GFX1132-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
|
||||
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
|
||||
; GFX1132-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1132-NEXT: .LBB5_1: ; %ComputeLoop
|
||||
@ -2718,6 +2753,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
|
||||
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
|
||||
@ -2772,6 +2808,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
|
||||
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
|
||||
@ -2863,6 +2900,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -2936,6 +2974,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
@ -2991,6 +3030,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -3056,16 +3096,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
|
||||
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
|
||||
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
|
||||
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
|
||||
@ -3474,6 +3516,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
|
||||
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
@ -3559,6 +3602,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
|
||||
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v4, 0
|
||||
@ -3632,6 +3676,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
|
||||
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
|
||||
@ -3690,6 +3735,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
|
||||
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
|
||||
@ -3737,6 +3783,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
|
||||
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
|
||||
@ -3798,16 +3845,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
|
||||
; GFX1132-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
|
||||
; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
|
||||
; GFX1132-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX1132-NEXT: v_mov_b32_e32 v5, 0x7ff80000
|
||||
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
|
||||
@ -3882,6 +3931,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
|
||||
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
|
||||
@ -3939,6 +3989,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
|
||||
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -4047,6 +4098,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -4131,6 +4183,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
@ -4197,6 +4250,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -4293,16 +4347,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
|
||||
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
|
||||
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0
|
||||
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0
|
||||
@ -4738,6 +4794,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
|
||||
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
@ -4823,6 +4880,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
|
||||
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v4, 0
|
||||
@ -4896,6 +4954,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
|
||||
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
|
||||
@ -4954,6 +5013,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
|
||||
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
|
||||
@ -5001,6 +5061,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
|
||||
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
|
||||
@ -5062,16 +5123,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
|
||||
; GFX1132-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
|
||||
; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
|
||||
; GFX1132-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX1132-NEXT: v_mov_b32_e32 v5, 0x7ff80000
|
||||
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
|
||||
@ -5146,6 +5209,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
|
||||
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
|
||||
@ -5203,6 +5267,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
|
||||
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -5311,6 +5376,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -5395,6 +5461,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
@ -5461,6 +5528,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -5557,16 +5625,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
|
||||
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
|
||||
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0
|
||||
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0
|
||||
@ -6002,6 +6072,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
|
||||
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
@ -6087,6 +6158,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
|
||||
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v4, 0
|
||||
@ -6160,6 +6232,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
|
||||
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
|
||||
@ -6218,6 +6291,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
|
||||
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
|
||||
@ -6265,6 +6339,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
|
||||
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
|
||||
@ -6326,16 +6401,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
|
||||
; GFX1132-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
|
||||
; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
|
||||
; GFX1132-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX1132-NEXT: v_mov_b32_e32 v5, 0x7ff80000
|
||||
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
|
||||
@ -6410,6 +6487,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
|
||||
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
|
||||
@ -6467,6 +6545,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
|
||||
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -6575,6 +6654,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -6659,6 +6739,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
@ -6725,6 +6806,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -6821,16 +6903,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
|
||||
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
|
||||
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0
|
||||
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0
|
||||
|
||||
@ -294,6 +294,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
|
||||
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
@ -374,6 +375,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
|
||||
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-NEXT: s_mov_b64 s[0:1], exec
|
||||
@ -444,6 +446,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
|
||||
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
@ -500,6 +503,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
|
||||
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
@ -545,6 +549,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
|
||||
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
@ -587,16 +592,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
|
||||
; GFX1132-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
|
||||
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
|
||||
; GFX1132-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1132-NEXT: .LBB1_1: ; %ComputeLoop
|
||||
@ -652,6 +659,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
|
||||
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
|
||||
@ -706,6 +714,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
|
||||
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
|
||||
@ -797,6 +806,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -870,6 +880,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
@ -925,6 +936,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -990,16 +1002,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
|
||||
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
|
||||
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
|
||||
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
|
||||
@ -1327,6 +1341,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
|
||||
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
@ -1407,6 +1422,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
|
||||
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-NEXT: s_mov_b64 s[0:1], exec
|
||||
@ -1477,6 +1493,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
|
||||
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
@ -1533,6 +1550,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
|
||||
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
@ -1578,6 +1596,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
|
||||
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
@ -1620,16 +1639,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
|
||||
; GFX1132-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
|
||||
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
|
||||
; GFX1132-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1132-NEXT: .LBB3_1: ; %ComputeLoop
|
||||
@ -1685,6 +1706,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
|
||||
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
|
||||
@ -1739,6 +1761,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
|
||||
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
|
||||
@ -1830,6 +1853,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -1903,6 +1927,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
@ -1958,6 +1983,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -2023,16 +2049,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
|
||||
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
|
||||
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
|
||||
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
|
||||
@ -2360,6 +2388,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
|
||||
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
@ -2440,6 +2469,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
|
||||
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-NEXT: s_mov_b64 s[0:1], exec
|
||||
@ -2510,6 +2540,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
|
||||
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
@ -2566,6 +2597,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
|
||||
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
@ -2611,6 +2643,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
|
||||
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
@ -2653,16 +2686,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
|
||||
; GFX1132-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
|
||||
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
|
||||
; GFX1132-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||||
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1132-NEXT: .LBB5_1: ; %ComputeLoop
|
||||
@ -2718,6 +2753,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
|
||||
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
|
||||
@ -2772,6 +2808,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
|
||||
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
|
||||
@ -2863,6 +2900,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -2936,6 +2974,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
@ -2991,6 +3030,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -3056,16 +3096,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
|
||||
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4
|
||||
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
|
||||
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
|
||||
@ -3474,6 +3516,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
|
||||
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
@ -3559,6 +3602,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
|
||||
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v4, 0
|
||||
@ -3632,6 +3676,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
|
||||
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
|
||||
@ -3690,6 +3735,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
|
||||
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
|
||||
@ -3737,6 +3783,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
|
||||
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
|
||||
@ -3798,16 +3845,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
|
||||
; GFX1132-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
|
||||
; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
|
||||
; GFX1132-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX1132-NEXT: v_mov_b32_e32 v5, 0x7ff80000
|
||||
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
|
||||
@ -3882,6 +3931,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
|
||||
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
|
||||
@ -3939,6 +3989,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
|
||||
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -4047,6 +4098,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -4131,6 +4183,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
@ -4197,6 +4250,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -4293,16 +4347,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
|
||||
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
|
||||
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0
|
||||
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0
|
||||
@ -4738,6 +4794,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
|
||||
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
@ -4823,6 +4880,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
|
||||
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v4, 0
|
||||
@ -4896,6 +4954,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
|
||||
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
|
||||
@ -4954,6 +5013,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
|
||||
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
|
||||
@ -5001,6 +5061,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
|
||||
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
|
||||
@ -5062,16 +5123,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
|
||||
; GFX1132-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
|
||||
; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
|
||||
; GFX1132-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX1132-NEXT: v_mov_b32_e32 v5, 0x7ff80000
|
||||
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
|
||||
@ -5146,6 +5209,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
|
||||
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
|
||||
@ -5203,6 +5267,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
|
||||
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -5311,6 +5376,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -5395,6 +5461,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
@ -5461,6 +5528,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -5557,16 +5625,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
|
||||
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
|
||||
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0
|
||||
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0
|
||||
@ -6002,6 +6072,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
|
||||
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
@ -6087,6 +6158,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
|
||||
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v4, 0
|
||||
@ -6160,6 +6232,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
|
||||
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
|
||||
@ -6218,6 +6291,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
|
||||
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
|
||||
@ -6265,6 +6339,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
|
||||
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
|
||||
@ -6326,16 +6401,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
|
||||
; GFX1132-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
|
||||
; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
|
||||
; GFX1132-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX1132-NEXT: v_mov_b32_e32 v5, 0x7ff80000
|
||||
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
|
||||
@ -6410,6 +6487,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
|
||||
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
|
||||
@ -6467,6 +6545,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
|
||||
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -6575,6 +6654,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -6659,6 +6739,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
@ -6725,6 +6806,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
|
||||
@ -6821,16 +6903,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
|
||||
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4
|
||||
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0
|
||||
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -57,18 +57,19 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
|
||||
; GFX11: ; %bb.0: ; %bb
|
||||
; GFX11-NEXT: s_mov_b64 s[18:19], s[4:5]
|
||||
; GFX11-NEXT: v_mov_b32_e32 v31, v0
|
||||
; GFX11-NEXT: s_load_b32 s24, s[18:19], 0x24
|
||||
; GFX11-NEXT: s_load_b32 s25, s[18:19], 0x24
|
||||
; GFX11-NEXT: s_mov_b32 s17, s15
|
||||
; GFX11-NEXT: s_mov_b32 s12, s13
|
||||
; GFX11-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX11-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v31
|
||||
; GFX11-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX11-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX11-NEXT: s_mov_b32 s20, 0
|
||||
; GFX11-NEXT: s_mov_b32 s0, -1
|
||||
; GFX11-NEXT: s_mov_b32 s17, exec_lo
|
||||
; GFX11-NEXT: s_mov_b32 s24, exec_lo
|
||||
; GFX11-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: v_mul_lo_u32 v0, s24, v0
|
||||
; GFX11-NEXT: v_mul_lo_u32 v0, s25, v0
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0
|
||||
; GFX11-NEXT: s_cbranch_execz .LBB2_13
|
||||
@ -76,7 +77,7 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
|
||||
; GFX11-NEXT: s_load_b128 s[20:23], s[18:19], 0x2c
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_bitcmp1_b32 s21, 0
|
||||
; GFX11-NEXT: s_cselect_b32 s25, -1, 0
|
||||
; GFX11-NEXT: s_cselect_b32 s26, -1, 0
|
||||
; GFX11-NEXT: s_bitcmp0_b32 s21, 0
|
||||
; GFX11-NEXT: s_mov_b32 s21, 0
|
||||
; GFX11-NEXT: s_cbranch_scc0 .LBB2_3
|
||||
@ -86,13 +87,14 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
|
||||
; GFX11-NEXT: s_getpc_b64 s[0:1]
|
||||
; GFX11-NEXT: s_add_u32 s0, s0, f0@gotpcrel32@lo+4
|
||||
; GFX11-NEXT: s_addc_u32 s1, s1, f0@gotpcrel32@hi+12
|
||||
; GFX11-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX11-NEXT: s_mov_b32 s13, s14
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX11-NEXT: s_mov_b32 s26, s14
|
||||
; GFX11-NEXT: s_mov_b32 s14, s15
|
||||
; GFX11-NEXT: s_mov_b32 s15, s14
|
||||
; GFX11-NEXT: s_mov_b32 s14, s17
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX11-NEXT: s_mov_b32 s14, s26
|
||||
; GFX11-NEXT: s_mov_b32 s14, s15
|
||||
; GFX11-NEXT: s_mov_b32 s2, -1
|
||||
; GFX11-NEXT: s_cbranch_execz .LBB2_4
|
||||
; GFX11-NEXT: s_branch .LBB2_12
|
||||
@ -126,11 +128,11 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
|
||||
; GFX11-NEXT: s_mul_i32 s0, s0, s22
|
||||
; GFX11-NEXT: s_mul_i32 s0, s0, s20
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: s_or_b32 s0, s24, s0
|
||||
; GFX11-NEXT: s_or_b32 s0, s25, s0
|
||||
; GFX11-NEXT: s_lshl_b64 s[22:23], s[0:1], 1
|
||||
; GFX11-NEXT: s_mov_b32 s0, s1
|
||||
; GFX11-NEXT: global_load_u16 v1, v0, s[22:23]
|
||||
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s25
|
||||
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s26
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
|
||||
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
||||
@ -151,7 +153,7 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
|
||||
; GFX11-NEXT: s_and_b32 s1, s1, 1
|
||||
; GFX11-NEXT: s_and_b32 s9, 0xffff, s0
|
||||
; GFX11-NEXT: s_cselect_b32 s9, -1, 0
|
||||
; GFX11-NEXT: s_and_b32 s16, s8, exec_lo
|
||||
; GFX11-NEXT: s_and_b32 s15, s8, exec_lo
|
||||
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s9
|
||||
; GFX11-NEXT: v_readfirstlane_b32 s9, v1
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
|
||||
@ -181,8 +183,8 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
|
||||
; GFX11-NEXT: s_and_b32 s20, s2, exec_lo
|
||||
; GFX11-NEXT: s_or_not1_b32 s0, s21, exec_lo
|
||||
; GFX11-NEXT: .LBB2_13: ; %Flow9
|
||||
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s17
|
||||
; GFX11-NEXT: s_and_saveexec_b32 s17, s0
|
||||
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s24
|
||||
; GFX11-NEXT: s_and_saveexec_b32 s21, s0
|
||||
; GFX11-NEXT: s_cbranch_execz .LBB2_15
|
||||
; GFX11-NEXT: ; %bb.14: ; %bb43
|
||||
; GFX11-NEXT: s_add_u32 s8, s18, 0x58
|
||||
@ -192,12 +194,13 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
|
||||
; GFX11-NEXT: s_addc_u32 s1, s1, f0@gotpcrel32@hi+12
|
||||
; GFX11-NEXT: s_mov_b32 s13, s14
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX11-NEXT: s_mov_b32 s14, s15
|
||||
; GFX11-NEXT: s_mov_b32 s14, s17
|
||||
; GFX11-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX11-NEXT: s_or_b32 s20, s20, exec_lo
|
||||
; GFX11-NEXT: .LBB2_15: ; %Flow14
|
||||
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s17
|
||||
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s21
|
||||
; GFX11-NEXT: s_and_saveexec_b32 s0, s20
|
||||
; GFX11-NEXT: ; %bb.16: ; %UnifiedUnreachableBlock
|
||||
; GFX11-NEXT: ; divergent unreachable
|
||||
|
||||
@ -77,6 +77,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b32 s12, s51
|
||||
; CHECK-NEXT: s_mov_b32 s13, s50
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr15
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v41, v0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v31, v40
|
||||
@ -109,6 +110,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b32 s12, s51
|
||||
; CHECK-NEXT: s_mov_b32 s13, s50
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr15
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: v_bfe_u32 v0, v0, v1, 4
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 12
|
||||
@ -194,6 +196,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b32 s13, s50
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr15
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; CHECK-NEXT: ds_write_b32 v0, v57
|
||||
@ -220,6 +223,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v59, 1, v57
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr15
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; CHECK-NEXT: ds_write_b32 v0, v59
|
||||
@ -246,6 +250,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v59, 2, v57
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr15
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; CHECK-NEXT: ds_write_b32 v0, v59
|
||||
@ -272,6 +277,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v58, 3, v57
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr15
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; CHECK-NEXT: ds_write_b32 v0, v58
|
||||
@ -322,6 +328,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b32 s13, s50
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr15
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; CHECK-NEXT: ds_write_b32 v0, v57
|
||||
@ -356,6 +363,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b32 s12, s51
|
||||
; CHECK-NEXT: s_mov_b32 s13, s50
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr15
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: s_mov_b32 s4, exec_lo
|
||||
@ -381,6 +389,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b32 s12, s51
|
||||
; CHECK-NEXT: s_mov_b32 s13, s50
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr15
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_add_co_u32 v41, vcc_lo, v0, v41
|
||||
; CHECK-NEXT: v_add_co_ci_u32_e64 v0, null, 0, v1, vcc_lo
|
||||
@ -441,6 +450,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b32 s13, s50
|
||||
; CHECK-NEXT: v_or3_b32 v73, v2, v0, v1
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr15
|
||||
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v73
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 2, v73
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fffc, v0
|
||||
@ -501,6 +511,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b32 s12, s51
|
||||
; CHECK-NEXT: s_mov_b32 s13, s50
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr15
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: s_branch .LBB0_27
|
||||
; CHECK-NEXT: .LBB0_33:
|
||||
@ -821,6 +832,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
|
||||
; CHECK-NEXT: s_mov_b32 s12, s51
|
||||
; CHECK-NEXT: s_mov_b32 s13, s50
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr15
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_mul_lo_u32 v44, v0, 14
|
||||
; CHECK-NEXT: v_mov_b32_e32 v31, v40
|
||||
@ -854,6 +866,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
|
||||
; CHECK-NEXT: s_mov_b32 s12, s51
|
||||
; CHECK-NEXT: s_mov_b32 s13, s50
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr15
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: v_bfe_u32 v0, v0, v1, 4
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 12
|
||||
@ -944,6 +957,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
|
||||
; CHECK-NEXT: s_mov_b32 s13, s50
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v43, 1, v43
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr15
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; CHECK-NEXT: ds_write_b32 v0, v56
|
||||
|
||||
@ -402,6 +402,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_inline_asm_phys_agpr(ptr add
|
||||
; GFX908: ; %bb.0: ; %bb
|
||||
; GFX908-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
||||
; GFX908-NEXT: v_mov_b32_e32 v32, 0
|
||||
; GFX908-NEXT: ; implicit-def: $agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111_agpr112_agpr113_agpr114_agpr115_agpr116_agpr117_agpr118_agpr119_agpr120_agpr121_agpr122_agpr123_agpr124_agpr125_agpr126_agpr127_agpr128_agpr129_agpr130_agpr131
|
||||
; GFX908-NEXT: ;;#ASMSTART
|
||||
; GFX908-NEXT: ; use a[100:131]
|
||||
; GFX908-NEXT: ;;#ASMEND
|
||||
@ -638,6 +639,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call(ptr addrspace(1) %arg)
|
||||
; GFX908-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX908-NEXT: s_mov_b32 s32, 0
|
||||
; GFX908-NEXT: v_mov_b32_e32 v40, 0
|
||||
; GFX908-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX908-NEXT: global_load_dwordx4 v[28:31], v40, s[34:35] offset:112
|
||||
@ -902,6 +904,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call_multi_bb(ptr addrspace(
|
||||
; GFX908-NEXT: s_mov_b64 s[0:1], s[52:53]
|
||||
; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX908-NEXT: s_mov_b64 s[2:3], s[54:55]
|
||||
; GFX908-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX908-NEXT: .LBB6_2: ; %bb3
|
||||
|
||||
49
llvm/test/CodeGen/AMDGPU/multi-use-implicit-def.mir
Normal file
49
llvm/test/CodeGen/AMDGPU/multi-use-implicit-def.mir
Normal file
@ -0,0 +1,49 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
|
||||
|
||||
# Ensure processing an IMPLICIT_DEF of a physreg updates all uses
|
||||
# before removing the IMPLICIT_DEF. -verify-machineinstrs will
|
||||
# fail otherwise.
|
||||
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=gfx802 -run-pass processimpdefs -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
|
||||
name: implicit_def_multiple_use
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: implicit_def_multiple_use
|
||||
; CHECK: $vgpr1_lo16 = COPY undef $vgpr0_hi16
|
||||
; CHECK-NEXT: $vgpr1_hi16 = COPY undef $vgpr0_lo16
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = IMPLICIT_DEF
|
||||
$vgpr1_lo16 = COPY $vgpr0_hi16
|
||||
$vgpr1_hi16 = COPY $vgpr0_lo16
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# IMPLICIT_DEF processing Will not search across basic-blocks for uses. IMPLICIT_DEF must not be deleted.
|
||||
---
|
||||
|
||||
name: implicit_def_cannot_find_all_uses
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: implicit_def_cannot_find_all_uses
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.0(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: $vgpr1_lo16 = COPY undef $vgpr0_hi16
|
||||
; CHECK-NEXT: $vgpr1_hi16 = COPY undef $vgpr0_lo16
|
||||
; CHECK-NEXT: $scc = IMPLICIT_DEF
|
||||
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.0, implicit undef $scc
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
bb.0:
|
||||
$vgpr0 = IMPLICIT_DEF
|
||||
$vgpr1_lo16 = COPY $vgpr0_hi16
|
||||
$vgpr1_hi16 = COPY $vgpr0_lo16
|
||||
$scc = IMPLICIT_DEF
|
||||
S_CBRANCH_SCC1 %bb.0, implicit $scc
|
||||
bb.1:
|
||||
...
|
||||
@ -30,6 +30,7 @@ define amdgpu_kernel void @main(i1 %arg, ptr %ptr, ptr addrspace(1) %ptr1, ptr a
|
||||
; GFX950-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX950-NEXT: v_mov_b32_e32 v31, v0
|
||||
; GFX950-NEXT: s_mov_b32 s32, 0
|
||||
; GFX950-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX950-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX950-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX950-NEXT: v_lshl_add_u64 v[10:11], v[0:1], 3, s[38:39]
|
||||
|
||||
@ -27,6 +27,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) {
|
||||
; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX8-NEXT: s_mov_b32 s32, 0
|
||||
; GFX8-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 7, v0
|
||||
@ -108,6 +109,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) {
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 7, v0
|
||||
@ -178,6 +180,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) {
|
||||
; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX10-NEXT: s_mov_b32 s32, 0
|
||||
; GFX10-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 7, v0
|
||||
@ -241,6 +244,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) {
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX11-NEXT: s_load_b64 s[34:35], s[4:5], 0x24
|
||||
; GFX11-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 7, v0
|
||||
@ -362,6 +366,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
|
||||
; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX8-NEXT: s_mov_b32 s32, 0
|
||||
; GFX8-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 17, v0
|
||||
@ -489,6 +494,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
|
||||
; GFX900-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX900-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX900-NEXT: s_mov_b32 s32, 0
|
||||
; GFX900-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX900-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX900-NEXT: v_and_b32_e32 v1, 0xff, v0
|
||||
@ -599,6 +605,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
|
||||
; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX10-NEXT: s_mov_b32 s32, 0
|
||||
; GFX10-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 17, v0
|
||||
@ -711,6 +718,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
|
||||
; GFX90A-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX90A-NEXT: s_mov_b32 s32, 0
|
||||
; GFX90A-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX90A-NEXT: v_and_b32_e32 v1, 0xff, v0
|
||||
@ -812,6 +820,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX11-NEXT: s_load_b64 s[34:35], s[4:5], 0x24
|
||||
; GFX11-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v1, 17, v0
|
||||
@ -1036,6 +1045,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
|
||||
; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX8-NEXT: s_mov_b32 s32, 0
|
||||
; GFX8-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 7, v0
|
||||
@ -1122,6 +1132,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 7, v0
|
||||
@ -1179,6 +1190,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
|
||||
; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX10-NEXT: s_mov_b32 s32, 0
|
||||
; GFX10-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 7, v0
|
||||
@ -1236,6 +1248,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX11-NEXT: s_load_b64 s[34:35], s[4:5], 0x24
|
||||
; GFX11-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 7, v0
|
||||
@ -1353,6 +1366,7 @@ define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) {
|
||||
; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX8-NEXT: s_mov_b32 s32, 0
|
||||
; GFX8-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 7, v0
|
||||
@ -1578,6 +1592,7 @@ define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) {
|
||||
; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX8-NEXT: s_mov_b32 s32, 0
|
||||
; GFX8-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 7, v0
|
||||
@ -1786,6 +1801,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
|
||||
; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51]
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX8-NEXT: s_mov_b32 s32, 0
|
||||
; GFX8-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
@ -1849,6 +1865,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
@ -1908,6 +1925,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
|
||||
; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
|
||||
; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
|
||||
; GFX10-NEXT: s_mov_b32 s32, 0
|
||||
; GFX10-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
@ -1958,6 +1976,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX11-NEXT: s_load_b128 s[36:39], s[4:5], 0x24
|
||||
; GFX11-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 7, v0
|
||||
@ -2058,6 +2077,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
|
||||
; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX8-NEXT: s_mov_b32 s32, 0
|
||||
; GFX8-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 7, v0
|
||||
@ -2139,6 +2159,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 7, v0
|
||||
@ -2208,6 +2229,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
|
||||
; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX10-NEXT: s_mov_b32 s32, 0
|
||||
; GFX10-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 7, v0
|
||||
@ -2275,6 +2297,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX11-NEXT: s_load_b64 s[34:35], s[4:5], 0x24
|
||||
; GFX11-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-NEXT: ; implicit-def: $sgpr15
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 7, v0
|
||||
|
||||
@ -25,6 +25,7 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
|
||||
; MUBUF-NEXT: s_add_u32 s4, s4, svm_eval_nodes@rel32@lo+4
|
||||
; MUBUF-NEXT: s_addc_u32 s5, s5, svm_eval_nodes@rel32@hi+12
|
||||
; MUBUF-NEXT: s_mov_b32 s32, 0xc0000
|
||||
; MUBUF-NEXT: ; implicit-def: $sgpr15
|
||||
; MUBUF-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v0, s0
|
||||
; MUBUF-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
@ -61,6 +62,7 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
|
||||
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
|
||||
; FLATSCR-NEXT: s_add_u32 s0, s0, svm_eval_nodes@rel32@lo+4
|
||||
; FLATSCR-NEXT: s_addc_u32 s1, s1, svm_eval_nodes@rel32@hi+12
|
||||
; FLATSCR-NEXT: ; implicit-def: $sgpr15
|
||||
; FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v0, s2
|
||||
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
@ -89,6 +91,7 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
|
||||
; MUBUF11-NEXT: s_add_u32 s0, s0, svm_eval_nodes@rel32@lo+4
|
||||
; MUBUF11-NEXT: s_addc_u32 s1, s1, svm_eval_nodes@rel32@hi+12
|
||||
; MUBUF11-NEXT: s_movk_i32 s32, 0x6000
|
||||
; MUBUF11-NEXT: ; implicit-def: $sgpr15
|
||||
; MUBUF11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; MUBUF11-NEXT: v_mov_b32_e32 v0, s2
|
||||
; MUBUF11-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
@ -116,6 +119,7 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
|
||||
; FLATSCR11-NEXT: s_add_u32 s0, s0, svm_eval_nodes@rel32@lo+4
|
||||
; FLATSCR11-NEXT: s_addc_u32 s1, s1, svm_eval_nodes@rel32@hi+12
|
||||
; FLATSCR11-NEXT: s_movk_i32 s32, 0x6000
|
||||
; FLATSCR11-NEXT: ; implicit-def: $sgpr15
|
||||
; FLATSCR11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; FLATSCR11-NEXT: v_mov_b32_e32 v0, s2
|
||||
; FLATSCR11-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
|
||||
42
llvm/test/CodeGen/AMDGPU/subreg-implicit-def.mir
Normal file
42
llvm/test/CodeGen/AMDGPU/subreg-implicit-def.mir
Normal file
@ -0,0 +1,42 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
|
||||
|
||||
# Ensure processing an IMPLICIT_DEF of a physreg handles subreg definitions
|
||||
# and super-reg uses correctly.
|
||||
|
||||
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=gfx802 -run-pass processimpdefs -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
# Must not add undef to use of implicit-def because def is partially redefined.
|
||||
---
|
||||
name: impdef_subreg_def
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: impdef_subreg_def
|
||||
; CHECK: $sgpr0_sgpr1 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: $sgpr0 = S_MOV_B32 0
|
||||
; CHECK-NEXT: $sgpr2_sgpr3 = COPY $sgpr0_sgpr1
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
$sgpr0_sgpr1 = IMPLICIT_DEF
|
||||
$sgpr0 = S_MOV_B32 0
|
||||
$sgpr2_sgpr3 = COPY $sgpr0_sgpr1
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# Must not add undef to use of implicit-def because use is larger than implicit definition.
|
||||
---
|
||||
name: impdef_superreg_use
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: impdef_superreg_use
|
||||
; CHECK: $sgpr0 = IMPLICIT_DEF
|
||||
; CHECK-NEXT: $sgpr1 = S_MOV_B32 0
|
||||
; CHECK-NEXT: $sgpr2_sgpr3 = COPY $sgpr0_sgpr1
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
$sgpr0 = IMPLICIT_DEF
|
||||
$sgpr1 = S_MOV_B32 0
|
||||
$sgpr2_sgpr3 = COPY $sgpr0_sgpr1
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
@ -39,7 +39,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS1-NEXT: global_store_dword v[44:45], v42, off
|
||||
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GLOBALNESS1-NEXT: global_load_dword v2, v42, s[52:53]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[48:49], s[4:5]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[38:39], s[4:5]
|
||||
; GLOBALNESS1-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18
|
||||
; GLOBALNESS1-NEXT: s_load_dword s7, s[8:9], 0x20
|
||||
; GLOBALNESS1-NEXT: s_add_u32 flat_scratch_lo, s12, s17
|
||||
@ -68,7 +68,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
|
||||
; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0
|
||||
; GLOBALNESS1-NEXT: s_xor_b64 s[4:5], s[4:5], -1
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[38:39], s[8:9]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[48:49], s[8:9]
|
||||
; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[8:9], 1, v1
|
||||
; GLOBALNESS1-NEXT: ; implicit-def: $vgpr57 : SGPR spill to VGPR lane
|
||||
; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0
|
||||
@ -127,21 +127,22 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS1-NEXT: ; =>This Loop Header: Depth=1
|
||||
; GLOBALNESS1-NEXT: ; Child Loop BB1_16 Depth 2
|
||||
; GLOBALNESS1-NEXT: flat_load_dword v40, v[46:47]
|
||||
; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
|
||||
; GLOBALNESS1-NEXT: s_add_u32 s8, s48, 40
|
||||
; GLOBALNESS1-NEXT: buffer_store_dword v42, off, s[0:3], 0
|
||||
; GLOBALNESS1-NEXT: flat_load_dword v56, v[46:47]
|
||||
; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
|
||||
; GLOBALNESS1-NEXT: s_addc_u32 s9, s49, 0
|
||||
; GLOBALNESS1-NEXT: s_getpc_b64 s[4:5]
|
||||
; GLOBALNESS1-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4
|
||||
; GLOBALNESS1-NEXT: s_addc_u32 s5, s5, wobble@gotpcrel32@hi+12
|
||||
; GLOBALNESS1-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[38:39]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
; GLOBALNESS1-NEXT: s_mov_b32 s12, s84
|
||||
; GLOBALNESS1-NEXT: s_mov_b32 s13, s83
|
||||
; GLOBALNESS1-NEXT: s_mov_b32 s14, s82
|
||||
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
|
||||
; GLOBALNESS1-NEXT: ; implicit-def: $sgpr15
|
||||
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[70:71]
|
||||
@ -241,13 +242,13 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_15
|
||||
; GLOBALNESS1-NEXT: ; %bb.22: ; %bb55.i
|
||||
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
|
||||
; GLOBALNESS1-NEXT: s_add_u32 s70, s38, 40
|
||||
; GLOBALNESS1-NEXT: s_addc_u32 s71, s39, 0
|
||||
; GLOBALNESS1-NEXT: s_add_u32 s70, s48, 40
|
||||
; GLOBALNESS1-NEXT: s_addc_u32 s71, s49, 0
|
||||
; GLOBALNESS1-NEXT: s_getpc_b64 s[4:5]
|
||||
; GLOBALNESS1-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4
|
||||
; GLOBALNESS1-NEXT: s_addc_u32 s5, s5, wobble@gotpcrel32@hi+12
|
||||
; GLOBALNESS1-NEXT: s_load_dwordx2 s[54:55], s[4:5], 0x0
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[38:39]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[70:71]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
@ -257,7 +258,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
|
||||
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[54:55]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[38:39]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[70:71]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
@ -266,6 +267,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS1-NEXT: s_mov_b32 s14, s82
|
||||
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
|
||||
; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[58:59], off
|
||||
; GLOBALNESS1-NEXT: ; implicit-def: $sgpr15
|
||||
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[54:55]
|
||||
; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[96:97]
|
||||
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_14
|
||||
@ -275,7 +277,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS1-NEXT: s_branch .LBB1_14
|
||||
; GLOBALNESS1-NEXT: .LBB1_24: ; %Flow23
|
||||
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
||||
; GLOBALNESS1-NEXT: s_load_dwordx4 s[4:7], s[38:39], 0x0
|
||||
; GLOBALNESS1-NEXT: s_load_dwordx4 s[4:7], s[48:49], 0x0
|
||||
; GLOBALNESS1-NEXT: v_readlane_b32 s70, v57, 8
|
||||
; GLOBALNESS1-NEXT: v_readlane_b32 s8, v57, 10
|
||||
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0
|
||||
@ -307,36 +309,38 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1
|
||||
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_31
|
||||
; GLOBALNESS1-NEXT: ; %bb.30: ; %bb7.i.i
|
||||
; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
|
||||
; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
|
||||
; GLOBALNESS1-NEXT: s_add_u32 s8, s48, 40
|
||||
; GLOBALNESS1-NEXT: s_addc_u32 s9, s49, 0
|
||||
; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17]
|
||||
; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
|
||||
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[38:39]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
; GLOBALNESS1-NEXT: s_mov_b32 s12, s84
|
||||
; GLOBALNESS1-NEXT: s_mov_b32 s13, s83
|
||||
; GLOBALNESS1-NEXT: s_mov_b32 s14, s82
|
||||
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
|
||||
; GLOBALNESS1-NEXT: ; implicit-def: $sgpr15
|
||||
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GLOBALNESS1-NEXT: .LBB1_31: ; %Flow
|
||||
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5]
|
||||
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_33
|
||||
; GLOBALNESS1-NEXT: ; %bb.32: ; %bb11.i.i
|
||||
; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
|
||||
; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
|
||||
; GLOBALNESS1-NEXT: s_add_u32 s8, s48, 40
|
||||
; GLOBALNESS1-NEXT: s_addc_u32 s9, s49, 0
|
||||
; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17]
|
||||
; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
|
||||
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[38:39]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
; GLOBALNESS1-NEXT: s_mov_b32 s12, s84
|
||||
; GLOBALNESS1-NEXT: s_mov_b32 s13, s83
|
||||
; GLOBALNESS1-NEXT: s_mov_b32 s14, s82
|
||||
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
|
||||
; GLOBALNESS1-NEXT: ; implicit-def: $sgpr15
|
||||
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GLOBALNESS1-NEXT: .LBB1_33: ; %UnifiedUnreachableBlock
|
||||
;
|
||||
@ -350,7 +354,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS0-NEXT: global_store_dword v[44:45], v42, off
|
||||
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GLOBALNESS0-NEXT: global_load_dword v2, v42, s[52:53]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[48:49], s[4:5]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[38:39], s[4:5]
|
||||
; GLOBALNESS0-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18
|
||||
; GLOBALNESS0-NEXT: s_load_dword s7, s[8:9], 0x20
|
||||
; GLOBALNESS0-NEXT: s_add_u32 flat_scratch_lo, s12, s17
|
||||
@ -379,7 +383,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
|
||||
; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0
|
||||
; GLOBALNESS0-NEXT: s_xor_b64 s[4:5], s[4:5], -1
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[38:39], s[8:9]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[48:49], s[8:9]
|
||||
; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[8:9], 1, v1
|
||||
; GLOBALNESS0-NEXT: ; implicit-def: $vgpr57 : SGPR spill to VGPR lane
|
||||
; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0
|
||||
@ -438,21 +442,22 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS0-NEXT: ; =>This Loop Header: Depth=1
|
||||
; GLOBALNESS0-NEXT: ; Child Loop BB1_16 Depth 2
|
||||
; GLOBALNESS0-NEXT: flat_load_dword v40, v[46:47]
|
||||
; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
|
||||
; GLOBALNESS0-NEXT: s_add_u32 s8, s48, 40
|
||||
; GLOBALNESS0-NEXT: buffer_store_dword v42, off, s[0:3], 0
|
||||
; GLOBALNESS0-NEXT: flat_load_dword v56, v[46:47]
|
||||
; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
|
||||
; GLOBALNESS0-NEXT: s_addc_u32 s9, s49, 0
|
||||
; GLOBALNESS0-NEXT: s_getpc_b64 s[4:5]
|
||||
; GLOBALNESS0-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4
|
||||
; GLOBALNESS0-NEXT: s_addc_u32 s5, s5, wobble@gotpcrel32@hi+12
|
||||
; GLOBALNESS0-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[38:39]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
; GLOBALNESS0-NEXT: s_mov_b32 s12, s82
|
||||
; GLOBALNESS0-NEXT: s_mov_b32 s13, s71
|
||||
; GLOBALNESS0-NEXT: s_mov_b32 s14, s70
|
||||
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
|
||||
; GLOBALNESS0-NEXT: ; implicit-def: $sgpr15
|
||||
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[84:85]
|
||||
@ -553,13 +558,13 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_15
|
||||
; GLOBALNESS0-NEXT: ; %bb.22: ; %bb55.i
|
||||
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
|
||||
; GLOBALNESS0-NEXT: s_add_u32 s84, s38, 40
|
||||
; GLOBALNESS0-NEXT: s_addc_u32 s85, s39, 0
|
||||
; GLOBALNESS0-NEXT: s_add_u32 s84, s48, 40
|
||||
; GLOBALNESS0-NEXT: s_addc_u32 s85, s49, 0
|
||||
; GLOBALNESS0-NEXT: s_getpc_b64 s[4:5]
|
||||
; GLOBALNESS0-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4
|
||||
; GLOBALNESS0-NEXT: s_addc_u32 s5, s5, wobble@gotpcrel32@hi+12
|
||||
; GLOBALNESS0-NEXT: s_load_dwordx2 s[54:55], s[4:5], 0x0
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[38:39]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[84:85]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
@ -569,7 +574,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
|
||||
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[54:55]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[38:39]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[84:85]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
@ -578,6 +583,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS0-NEXT: s_mov_b32 s14, s70
|
||||
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
|
||||
; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[58:59], off
|
||||
; GLOBALNESS0-NEXT: ; implicit-def: $sgpr15
|
||||
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[54:55]
|
||||
; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[96:97]
|
||||
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_14
|
||||
@ -617,36 +623,38 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1
|
||||
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_31
|
||||
; GLOBALNESS0-NEXT: ; %bb.30: ; %bb7.i.i
|
||||
; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
|
||||
; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
|
||||
; GLOBALNESS0-NEXT: s_add_u32 s8, s48, 40
|
||||
; GLOBALNESS0-NEXT: s_addc_u32 s9, s49, 0
|
||||
; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17]
|
||||
; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
|
||||
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[38:39]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
; GLOBALNESS0-NEXT: s_mov_b32 s12, s82
|
||||
; GLOBALNESS0-NEXT: s_mov_b32 s13, s71
|
||||
; GLOBALNESS0-NEXT: s_mov_b32 s14, s70
|
||||
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
|
||||
; GLOBALNESS0-NEXT: ; implicit-def: $sgpr15
|
||||
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GLOBALNESS0-NEXT: .LBB1_31: ; %Flow
|
||||
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5]
|
||||
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_33
|
||||
; GLOBALNESS0-NEXT: ; %bb.32: ; %bb11.i.i
|
||||
; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
|
||||
; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
|
||||
; GLOBALNESS0-NEXT: s_add_u32 s8, s48, 40
|
||||
; GLOBALNESS0-NEXT: s_addc_u32 s9, s49, 0
|
||||
; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17]
|
||||
; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
|
||||
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[38:39]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
; GLOBALNESS0-NEXT: s_mov_b32 s12, s82
|
||||
; GLOBALNESS0-NEXT: s_mov_b32 s13, s71
|
||||
; GLOBALNESS0-NEXT: s_mov_b32 s14, s70
|
||||
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
|
||||
; GLOBALNESS0-NEXT: ; implicit-def: $sgpr15
|
||||
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GLOBALNESS0-NEXT: .LBB1_33: ; %UnifiedUnreachableBlock
|
||||
bb:
|
||||
|
||||
@ -66,6 +66,7 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
|
||||
; CHECK-NEXT: flat_store_dwordx2 v[44:45], v[58:59]
|
||||
; CHECK-NEXT: flat_store_dwordx2 v[56:57], v[62:63]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr15
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[54:55]
|
||||
; CHECK-NEXT: flat_load_dwordx2 v[0:1], v[46:47] glc
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
|
||||
@ -150,6 +150,7 @@ define dso_local void @P10_Spill_CR_UN(ptr %arg, ptr %arg1, i32 %arg2) local_unn
|
||||
; CHECK-NEXT: extsh r9, r3
|
||||
; CHECK-NEXT: extsw r6, r28
|
||||
; CHECK-NEXT: li r5, 0
|
||||
; CHECK-NEXT: # implicit-def: $f2
|
||||
; CHECK-NEXT: li r7, 0
|
||||
; CHECK-NEXT: std r30, 104(r1)
|
||||
; CHECK-NEXT: std r29, 96(r1)
|
||||
@ -308,6 +309,7 @@ define dso_local void @P10_Spill_CR_UN(ptr %arg, ptr %arg1, i32 %arg2) local_unn
|
||||
; CHECK-BE-NEXT: extsh r9, r3
|
||||
; CHECK-BE-NEXT: extsw r6, r28
|
||||
; CHECK-BE-NEXT: li r5, 0
|
||||
; CHECK-BE-NEXT: # implicit-def: $f2
|
||||
; CHECK-BE-NEXT: li r7, 0
|
||||
; CHECK-BE-NEXT: std r30, 120(r1)
|
||||
; CHECK-BE-NEXT: std r29, 112(r1)
|
||||
|
||||
@ -24,14 +24,22 @@ define signext i32 @foo() #1 personality ptr @__gxx_personality_v0 {
|
||||
; CHECK-NEXT: addi s0, sp, 32
|
||||
; CHECK-NEXT: .cfi_def_cfa s0, 0
|
||||
; CHECK-NEXT: .cfi_remember_state
|
||||
; CHECK-NEXT: .Ltmp0:
|
||||
; CHECK-NEXT: .Ltmp0: # EH_LABEL
|
||||
; CHECK-NEXT: addi sp, sp, -32
|
||||
; CHECK-NEXT: # implicit-def: $x10
|
||||
; CHECK-NEXT: # implicit-def: $x11
|
||||
; CHECK-NEXT: # implicit-def: $x12
|
||||
; CHECK-NEXT: # implicit-def: $x13
|
||||
; CHECK-NEXT: # implicit-def: $x14
|
||||
; CHECK-NEXT: # implicit-def: $x15
|
||||
; CHECK-NEXT: # implicit-def: $x16
|
||||
; CHECK-NEXT: # implicit-def: $x17
|
||||
; CHECK-NEXT: call _Z3fooiiiiiiiiiiPi
|
||||
; CHECK-NEXT: addi sp, sp, 32
|
||||
; CHECK-NEXT: .Ltmp1:
|
||||
; CHECK-NEXT: .Ltmp1: # EH_LABEL
|
||||
; CHECK-NEXT: # %bb.1: # %try.cont.unreachable
|
||||
; CHECK-NEXT: .LBB0_2: # %lpad
|
||||
; CHECK-NEXT: .Ltmp2:
|
||||
; CHECK-NEXT: .Ltmp2: # EH_LABEL
|
||||
; CHECK-NEXT: sext.w a1, a1
|
||||
; CHECK-NEXT: li a2, 1
|
||||
; CHECK-NEXT: bne a1, a2, .LBB0_4
|
||||
|
||||
@ -13,6 +13,7 @@ define i32 @test(i32 %n) nounwind {
|
||||
; CHECK-NEXT: .LBB0_1: @ %bb
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: bl f
|
||||
; CHECK-NEXT: @ implicit-def: $r1
|
||||
; CHECK-NEXT: bl g
|
||||
; CHECK-NEXT: subs r4, #1
|
||||
; CHECK-NEXT: bne .LBB0_1
|
||||
@ -58,6 +59,7 @@ define i32 @test_dead_cycle(i32 %n) nounwind {
|
||||
; CHECK-NEXT: @ %bb.2: @ %bb1
|
||||
; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1
|
||||
; CHECK-NEXT: bl f
|
||||
; CHECK-NEXT: @ implicit-def: $r1
|
||||
; CHECK-NEXT: bl g
|
||||
; CHECK-NEXT: .LBB1_3: @ %bb2
|
||||
; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1
|
||||
|
||||
@ -26,6 +26,7 @@ define dso_local void @vga_load_state() #0 {
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: .LBB0_4: # %for.cond1
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: # implicit-def: $edx
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: movq vga_load_state_p(%rip), %rax
|
||||
|
||||
@ -24,6 +24,7 @@ define void @PR24199(i32 %a0) {
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: .LBB0_3: # %if.end
|
||||
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-NEXT: # implicit-def: $rdi
|
||||
; CHECK-NEXT: callq foo@PLT
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 4-byte Reload
|
||||
|
||||
20
llvm/test/CodeGen/X86/multi-use-implicit-def.mir
Normal file
20
llvm/test/CodeGen/X86/multi-use-implicit-def.mir
Normal file
@ -0,0 +1,20 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
|
||||
# Ensure processing an IMPLICIT_DEF of a physreg updates all uses
|
||||
# before removing the IMPLICIT_DEF. -verify-machineinstrs will
|
||||
# fail otherwise.
|
||||
|
||||
# RUN: llc -mtriple=x86_64-- -x mir -stop-after processimpdefs < %s -verify-machineinstrs | FileCheck %s
|
||||
---
|
||||
name: implicit_def
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
; CHECK-LABEL: name: implicit_def
|
||||
; CHECK: MOV32mr $rip, 1, $noreg, 12, $noreg, undef $eax
|
||||
; CHECK-NEXT: RET 0, undef $eax
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: .1.entry:
|
||||
$eax = IMPLICIT_DEF
|
||||
MOV32mr $rip, 1, $noreg, 12, $noreg, $eax
|
||||
RET 0, $eax
|
||||
...
|
||||
@ -67,10 +67,11 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
|
||||
; CHECK-NEXT: cmpq %rax, %rcx
|
||||
; CHECK-NEXT: jae LBB0_8
|
||||
; CHECK-NEXT: ## %bb.7: ## %for.body.lr.ph
|
||||
; CHECK-NEXT: movq %rdi, %r14
|
||||
; CHECK-NEXT: ## implicit-def: $rdi
|
||||
; CHECK-NEXT: movq %rdx, %rbx
|
||||
; CHECK-NEXT: movl $512, %edx ## imm = 0x200
|
||||
; CHECK-NEXT: movl $32, %esi
|
||||
; CHECK-NEXT: movq %rdi, %r14
|
||||
; CHECK-NEXT: callq _memset
|
||||
; CHECK-NEXT: movq %r14, %rdi
|
||||
; CHECK-NEXT: movq %rbx, %rdx
|
||||
@ -156,6 +157,7 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
|
||||
; CHECK-NEXT: jne LBB0_31
|
||||
; CHECK-NEXT: ## %bb.30: ## %lor.rhs500
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_28 Depth=2
|
||||
; CHECK-NEXT: ## implicit-def: $edi
|
||||
; CHECK-NEXT: movl $256, %esi ## imm = 0x100
|
||||
; CHECK-NEXT: callq ___maskrune
|
||||
; CHECK-NEXT: movb $1, %sil
|
||||
@ -258,6 +260,8 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
|
||||
; CHECK-NEXT: xorl %ebx, %ebx
|
||||
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload
|
||||
; CHECK-NEXT: LBB0_47: ## %if.then1477
|
||||
; CHECK-NEXT: ## implicit-def: $edi
|
||||
; CHECK-NEXT: ## implicit-def: $rsi
|
||||
; CHECK-NEXT: movl $1, %edx
|
||||
; CHECK-NEXT: callq _write
|
||||
; CHECK-NEXT: subq %rbx, %r14
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
; The new code choses %ebp as the split candidate as it has lower spill cost.
|
||||
|
||||
; Make sure the split behaves as expected
|
||||
; CHECK: RS_Split Cascade 1
|
||||
; CHECK: RS_Split Cascade 0
|
||||
; CHECK-NOT: $eax static =
|
||||
; CHECK: $eax no positive bundles
|
||||
; CHECK-NEXT: $ecx no positive bundles
|
||||
|
||||
@ -8,6 +8,7 @@ define void @quux() gc "statepoint-example" {
|
||||
; CHECK: # %bb.0: # %bb1
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: # implicit-def: $rdi
|
||||
; CHECK-NEXT: movl $4, %esi
|
||||
; CHECK-NEXT: callq wombat@PLT
|
||||
; CHECK-NEXT: .Ltmp0:
|
||||
|
||||
@ -183,6 +183,11 @@ define void @test_duplicate_ir_values() gc "statepoint-example" personality ptr
|
||||
; CHECK-NEXT: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.0)
|
||||
; CHECK-NEXT: EH_LABEL <mcsymbol >
|
||||
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: dead $edi = IMPLICIT_DEF
|
||||
; CHECK-NEXT: dead $rsi = IMPLICIT_DEF
|
||||
; CHECK-NEXT: dead $edx = IMPLICIT_DEF
|
||||
; CHECK-NEXT: dead $ecx = IMPLICIT_DEF
|
||||
; CHECK-NEXT: dead $r8d = IMPLICIT_DEF
|
||||
; CHECK-NEXT: STATEPOINT 1, 16, 5, undef renamable $rax, undef $edi, undef $rsi, undef $edx, undef $ecx, undef $r8d, 2, 0, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (volatile load store (s64) on %stack.0)
|
||||
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
|
||||
; CHECK-NEXT: EH_LABEL <mcsymbol >
|
||||
|
||||
@ -109,6 +109,7 @@ define i32 @loop_shared_header(ptr %exe, i32 %exesz, i32 %headsize, i32 %min, i3
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: jne .LBB1_12
|
||||
; CHECK-NEXT: # %bb.2: # %if.end50
|
||||
; CHECK-NEXT: # implicit-def: $rsi
|
||||
; CHECK-NEXT: movq %r14, %rdi
|
||||
; CHECK-NEXT: movq %r15, %rdx
|
||||
; CHECK-NEXT: callq memcpy@PLT
|
||||
@ -170,6 +171,7 @@ define i32 @loop_shared_header(ptr %exe, i32 %exesz, i32 %headsize, i32 %min, i3
|
||||
; CHECK-NEXT: .LBB1_11: # %if.then99.i
|
||||
; CHECK-NEXT: movq .str.6@GOTPCREL(%rip), %rdi
|
||||
; CHECK-NEXT: xorl %ebx, %ebx
|
||||
; CHECK-NEXT: # implicit-def: $esi
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: callq cli_dbgmsg@PLT
|
||||
; CHECK-NEXT: .LBB1_12: # %cleanup
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user