From c245d764b8bd70ff78044f56b2dea619b0d428dc Mon Sep 17 00:00:00 2001 From: LU-JOHN Date: Wed, 1 Apr 2026 10:11:42 -0500 Subject: [PATCH] [CodeGen] Do not remove IMPLICIT_DEF unless all uses have undef flag added (#188133) Do not remove IMPLICIT_DEF of a physreg unless all uses have an undef flag added. Previously, only the first use instruction had undef flags added. This will cause a failure in machine instruction verification. Multi-instruction uses tested in AMDGPU/multi-use-implicit-def.mir and X86/multi-use-implicit-def.mir. --------- Signed-off-by: John Lu --- llvm/lib/CodeGen/ProcessImplicitDefs.cpp | 85 ++++-- ...der-no-live-segment-at-def-implicit-def.ll | 26 +- .../branch-folding-implicit-def-subreg.ll | 112 ++++---- llvm/test/CodeGen/AMDGPU/call-constexpr.ll | 2 + llvm/test/CodeGen/AMDGPU/call-skip.ll | 14 +- .../AMDGPU/cross-block-use-is-not-abi-copy.ll | 14 +- .../AMDGPU/global_atomics_scan_fadd.ll | 260 ++++++++++++++---- .../AMDGPU/global_atomics_scan_fmax.ll | 156 ++++++++--- .../AMDGPU/global_atomics_scan_fmin.ll | 156 ++++++++--- .../AMDGPU/global_atomics_scan_fsub.ll | 260 ++++++++++++++---- .../CodeGen/AMDGPU/insert-delay-alu-bug.ll | 33 ++- ...ne-sink-temporal-divergence-swdev407790.ll | 14 + llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll | 3 + .../CodeGen/AMDGPU/multi-use-implicit-def.mir | 49 ++++ llvm/test/CodeGen/AMDGPU/phi-av-pressure.ll | 1 + .../AMDGPU/promote-constOffset-to-imm.ll | 23 ++ ...tack-pointer-offset-relative-frameindex.ll | 4 + .../CodeGen/AMDGPU/subreg-implicit-def.mir | 42 +++ .../AMDGPU/tuple-allocation-failure.ll | 70 ++--- .../AMDGPU/undef-handling-crash-in-ra.ll | 1 + llvm/test/CodeGen/PowerPC/p10-spill-crun.ll | 2 + llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll | 14 +- .../CodeGen/Thumb2/2010-02-11-phi-cycle.ll | 2 + llvm/test/CodeGen/X86/issue76416.ll | 1 + .../X86/machine-trace-metrics-crash.ll | 1 + .../CodeGen/X86/multi-use-implicit-def.mir | 20 ++ llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 6 +- .../X86/regalloc-advanced-split-cost.ll | 2 +- .../CodeGen/X86/statepoint-two-results.ll | 1 + .../CodeGen/X86/statepoint-vreg-invoke.ll | 5 + .../X86/tail-dup-merge-loop-headers.ll | 2 + 31 files changed, 1030 insertions(+), 351 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/multi-use-implicit-def.mir create mode 100644 llvm/test/CodeGen/AMDGPU/subreg-implicit-def.mir create mode 100644 llvm/test/CodeGen/X86/multi-use-implicit-def.mir diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index 6b58b6574f5b..5fc765d90f6e 100644 --- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -102,38 +102,63 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { } // This is a physreg implicit-def. - // Look for the first instruction to use or define an alias. - MachineBasicBlock::instr_iterator UserMI = MI->getIterator(); - MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end(); - bool Found = false; - for (++UserMI; UserMI != UserE; ++UserMI) { - for (MachineOperand &MO : UserMI->operands()) { - if (!MO.isReg()) - continue; - Register UserReg = MO.getReg(); - if (!UserReg.isPhysical() || !TRI->regsOverlap(Reg, UserReg)) - continue; - // UserMI uses or redefines Reg. Set flags on all uses. - Found = true; - if (MO.isUse()) - MO.setIsUndef(); - } - if (Found) - break; - } - - // If we found the using MI, we can erase the IMPLICIT_DEF. - if (Found) { - LLVM_DEBUG(dbgs() << "Physreg user: " << *UserMI); - MI->eraseFromParent(); - return; - } - - // Using instr wasn't found, it could be in another block. - // Leave the physreg IMPLICIT_DEF, but trim any extra operands. + // Trim any extra operands. for (unsigned i = MI->getNumOperands() - 1; i; --i) MI->removeOperand(i); - LLVM_DEBUG(dbgs() << "Keeping physreg: " << *MI); + + // Try to add undef flag to all uses. If all uses are updated remove + // implicit-def. + MachineBasicBlock::instr_iterator SearchMI = MI->getIterator(); + MachineBasicBlock::instr_iterator SearchE = MI->getParent()->instr_end(); + bool ImplicitDefIsDead = false; + bool SearchedWholeBlock = true; + constexpr unsigned SearchLimit = 35; + unsigned Count = 0; + for (++SearchMI; SearchMI != SearchE; ++SearchMI) { + if (SearchMI->isDebugInstr()) + continue; + if (++Count > SearchLimit) { + SearchedWholeBlock = false; + break; + } + for (MachineOperand &MO : SearchMI->operands()) { + if (!MO.isReg()) + continue; + Register SearchReg = MO.getReg(); + if (!SearchReg.isPhysical() || !TRI->regsOverlap(Reg, SearchReg)) + continue; + // SearchMI uses or redefines Reg. Set flags on all uses. + if (MO.isUse()) { + if (TRI->isSubRegisterEq(Reg, SearchReg)) { + MO.setIsUndef(); + } else { + // Use is larger than Reg. It is not safe to add undef to this use. + return; + } + } + if (MO.isDef()) { + if (TRI->isSubRegisterEq(SearchReg, Reg)) { + ImplicitDefIsDead = true; + } else { + // Reg is larger than definition. It is not safe to add undef to any + // subsequent uses of Reg. + return; + } + } + } + if (ImplicitDefIsDead) { + LLVM_DEBUG(dbgs() << "Physreg redefine: " << *SearchMI); + break; + } + } + + // If we have added an undef flag to all uses (i.e. we have found a redefining + // MI or there are no successors), we can erase the IMPLICIT_DEF. + if (ImplicitDefIsDead || + (SearchedWholeBlock && MI->getParent()->succ_empty())) { + MI->eraseFromParent(); + LLVM_DEBUG(dbgs() << "Deleting implicit-def: " << *MI); + } } bool ProcessImplicitDefsLegacy::runOnMachineFunction(MachineFunction &MF) { diff --git a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll index 3bab9d0e8054..554d7eb2cfbd 100644 --- a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll +++ b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll @@ -21,12 +21,12 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext ; CHECK-NEXT: s_cbranch_scc1 .LBB0_4 ; CHECK-NEXT: ; %bb.2: ; %if.else251.i.i ; CHECK-NEXT: s_cmp_lg_u32 s55, 0 -; CHECK-NEXT: s_mov_b32 s17, 0 -; CHECK-NEXT: s_cselect_b32 s12, -1, 0 -; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s12 +; CHECK-NEXT: s_mov_b32 s13, s15 +; CHECK-NEXT: s_cselect_b32 s17, -1, 0 +; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s17 ; CHECK-NEXT: s_cbranch_vccz .LBB0_5 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: s_mov_b32 s18, 0 +; CHECK-NEXT: s_mov_b32 s15, 0 ; CHECK-NEXT: s_branch .LBB0_6 ; CHECK-NEXT: .LBB0_4: ; CHECK-NEXT: s_mov_b32 s14, s12 @@ -36,36 +36,36 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext ; CHECK-NEXT: s_mov_b64 s[48:49], s[12:13] ; CHECK-NEXT: s_branch .LBB0_8 ; CHECK-NEXT: .LBB0_5: ; %if.then263.i.i -; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s53, 0 -; CHECK-NEXT: s_mov_b32 s18, 1.0 -; CHECK-NEXT: s_mov_b32 s17, 0x7fc00000 +; CHECK-NEXT: v_cmp_lt_f32_e64 s17, s53, 0 +; CHECK-NEXT: s_mov_b32 s15, 1.0 +; CHECK-NEXT: s_mov_b32 s12, 0x7fc00000 ; CHECK-NEXT: .LBB0_6: ; %Flow ; CHECK-NEXT: s_mov_b32 s48, 1.0 -; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12 +; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s17 ; CHECK-NEXT: s_mov_b32 s49, s48 ; CHECK-NEXT: s_mov_b32 s50, s48 ; CHECK-NEXT: s_mov_b32 s51, s48 ; CHECK-NEXT: s_cbranch_vccnz .LBB0_8 ; CHECK-NEXT: ; %bb.7: ; %if.end273.i.i -; CHECK-NEXT: s_add_u32 s12, s8, 40 -; CHECK-NEXT: s_addc_u32 s13, s9, 0 +; CHECK-NEXT: s_add_u32 s18, s8, 40 +; CHECK-NEXT: s_addc_u32 s19, s9, 0 ; CHECK-NEXT: s_getpc_b64 s[20:21] ; CHECK-NEXT: s_add_u32 s20, s20, _Z3dotDv3_fS_@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s21, s21, _Z3dotDv3_fS_@gotpcrel32@hi+12 ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; CHECK-NEXT: s_load_dwordx2 s[20:21], s[20:21], 0x0 ; CHECK-NEXT: v_lshlrev_b32_e32 v3, 10, v1 -; CHECK-NEXT: v_add_f32_e64 v1, s17, s18 +; CHECK-NEXT: v_add_f32_e64 v1, s12, s15 ; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9] -; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13] +; CHECK-NEXT: s_mov_b64 s[8:9], s[18:19] ; CHECK-NEXT: s_mov_b32 s12, s14 ; CHECK-NEXT: v_or3_b32 v31, v0, v3, v2 ; CHECK-NEXT: v_mov_b32_e32 v0, v1 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: v_mov_b32_e32 v2, 0 -; CHECK-NEXT: s_mov_b32 s13, s15 ; CHECK-NEXT: s_mov_b32 s14, s16 ; CHECK-NEXT: s_mov_b32 s48, 0 +; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[20:21] ; CHECK-NEXT: s_mov_b64 s[8:9], s[34:35] diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll index e0b83eeaa0fa..bc88acf521bd 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll @@ -12,11 +12,12 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr13, 0, implicit-def dead $scc, implicit $scc ; GFX90A-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr17, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: renamable $sgpr17 = COPY $sgpr15 ; GFX90A-NEXT: renamable $vgpr31 = COPY $vgpr0, implicit $exec ; GFX90A-NEXT: early-clobber renamable $sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX4_IMM_ec renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s128) from %ir.arg3.kernarg.offset.align.down, align 8, addrspace 4) ; GFX90A-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27 = S_LOAD_DWORDX4_IMM renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4) ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1 + 16, align 16, addrspace 4) - ; GFX90A-NEXT: renamable $sgpr17 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg3.kernarg.offset.align.down + 16, align 8, addrspace 4) + ; GFX90A-NEXT: renamable $sgpr15 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg3.kernarg.offset.align.down + 16, align 8, addrspace 4) ; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr20, 0, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_CSELECT_B64 -1, 0, implicit killed $scc ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 -1 @@ -40,7 +41,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr56, $sgpr57, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr4, $vgpr5 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr56, $sgpr57, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr4, $vgpr5 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF implicit-def $vgpr16 ; GFX90A-NEXT: renamable $vgpr3 = IMPLICIT_DEF implicit-def $vgpr2 @@ -51,7 +52,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3.Flow17: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.57(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr6 = V_AND_B32_e32 1023, $vgpr31, implicit $exec ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr18_sgpr19, implicit-def dead $scc @@ -59,7 +60,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.4.bb15: ; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr0_vgpr1 = nsw V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, implicit $exec ; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr25, implicit $exec @@ -74,7 +75,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.5: ; GFX90A-NEXT: successors: %bb.6(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1 ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0 @@ -109,7 +110,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.6.Flow20: ; GFX90A-NEXT: successors: %bb.7(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr26 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec ; GFX90A-NEXT: renamable $vgpr28 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec @@ -122,7 +123,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.7.Flow19: ; GFX90A-NEXT: successors: %bb.62(0x40000000), %bb.8(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_MOV_B64 0 ; GFX90A-NEXT: $sgpr18_sgpr19 = S_AND_SAVEEXEC_B64 $sgpr36_sgpr37, implicit-def $exec, implicit-def $scc, implicit $exec @@ -130,7 +131,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.8.Flow32: ; GFX90A-NEXT: successors: %bb.9(0x40000000), %bb.10(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr18_sgpr19, implicit-def $scc ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr40_sgpr41, implicit-def $exec, implicit-def $scc, implicit $exec @@ -139,7 +140,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.9.bb89: ; GFX90A-NEXT: successors: %bb.10(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) @@ -147,7 +148,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.10.Flow33: ; GFX90A-NEXT: successors: %bb.11(0x40000000), %bb.12(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr46_sgpr47, implicit-def $exec, implicit-def $scc, implicit $exec @@ -156,7 +157,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.11.bb84: ; GFX90A-NEXT: successors: %bb.12(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) @@ -164,7 +165,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.12.Flow34: ; GFX90A-NEXT: successors: %bb.13(0x40000000), %bb.14(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr44_sgpr45, implicit-def $exec, implicit-def $scc, implicit $exec @@ -173,7 +174,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.13.bb79: ; GFX90A-NEXT: successors: %bb.14(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) @@ -181,7 +182,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.14.Flow35: ; GFX90A-NEXT: successors: %bb.15(0x40000000), %bb.16(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr42_sgpr43, implicit-def $exec, implicit-def $scc, implicit $exec @@ -190,14 +191,15 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.15.bb72: ; GFX90A-NEXT: successors: %bb.16(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr8 = S_ADD_U32 renamable $sgpr8, 48, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr9 = S_ADDC_U32 killed renamable $sgpr9, 0, implicit-def dead $scc, implicit killed $scc ; GFX90A-NEXT: renamable $sgpr12_sgpr13 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @f2, target-flags(amdgpu-gotprel32-hi) @f2, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_LOAD_DWORDX2_IMM killed renamable $sgpr12_sgpr13, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) + ; GFX90A-NEXT: dead $sgpr15 = IMPLICIT_DEF ; GFX90A-NEXT: $sgpr12 = COPY killed renamable $sgpr14 - ; GFX90A-NEXT: $sgpr13 = COPY killed renamable $sgpr15 + ; GFX90A-NEXT: $sgpr13 = COPY killed renamable $sgpr17 ; GFX90A-NEXT: $sgpr14 = COPY killed renamable $sgpr16 ; GFX90A-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr18_sgpr19, @f2, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit undef $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1 ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc @@ -358,7 +360,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.35.bb20: ; GFX90A-NEXT: successors: %bb.37(0x40000000), %bb.36(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_SBYTE renamable $vgpr40_vgpr41, 1024, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i23) ; GFX90A-NEXT: renamable $vgpr42 = V_ADD_CO_U32_e32 1024, $vgpr40, implicit-def $vcc, implicit $exec @@ -398,14 +400,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.36.Flow21: ; GFX90A-NEXT: successors: %bb.6(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def $scc ; GFX90A-NEXT: S_BRANCH %bb.6 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.37.bb27: ; GFX90A-NEXT: successors: %bb.39(0x40000000), %bb.38(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr44_sgpr45, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67, $sgpr48_sgpr49 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr44_sgpr45, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67, $sgpr48_sgpr49 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE renamable $vgpr40_vgpr41, 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i30) ; GFX90A-NEXT: renamable $vgpr44 = V_ADD_CO_U32_e32 2048, $vgpr40, implicit-def $vcc, implicit $exec @@ -437,7 +439,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.38.Flow22: ; GFX90A-NEXT: successors: %bb.36(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr38_sgpr39, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_XOR_B64 $exec, -1, implicit-def dead $scc @@ -458,7 +460,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.39.bb34: ; GFX90A-NEXT: successors: %bb.41(0x40000000), %bb.40(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr46_sgpr47, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr46_sgpr47, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE renamable $vgpr40_vgpr41, 3072, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i37) ; GFX90A-NEXT: renamable $vgpr56 = V_ADD_CO_U32_e32 3072, $vgpr40, implicit-def $vcc, implicit $exec @@ -489,7 +491,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.40.Flow23: ; GFX90A-NEXT: successors: %bb.38(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr40_sgpr41, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_XOR_B64 $exec, -1, implicit-def dead $scc @@ -509,7 +511,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.41.bb41: ; GFX90A-NEXT: successors: %bb.46(0x40000000), %bb.42(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr66_sgpr67, $sgpr68_sgpr69 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr66_sgpr67, $sgpr68_sgpr69 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr58 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $vgpr1, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, $vgpr41, $vcc, 0, implicit $exec @@ -539,7 +541,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.42.Flow24: ; GFX90A-NEXT: successors: %bb.40(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr42_sgpr43, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_XOR_B64 $exec, -1, implicit-def dead $scc @@ -558,7 +560,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.43.bb55: ; GFX90A-NEXT: successors: %bb.48(0x40000000), %bb.44(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr20, 16, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_CSELECT_B64 -1, 0, implicit killed $scc @@ -570,7 +572,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.44: ; GFX90A-NEXT: successors: %bb.45(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr57, $vgpr62, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $vgpr40, $vgpr61, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr56, $vgpr47, $vgpr2, $vgpr4, $vgpr5, $vgpr6, $vgpr46, $vgpr45, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr58, $vgpr60, $vgpr63, $vgpr59 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr57, $vgpr62, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $vgpr40, $vgpr61, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr56, $vgpr47, $vgpr2, $vgpr4, $vgpr5, $vgpr6, $vgpr46, $vgpr45, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr58, $vgpr60, $vgpr63, $vgpr59 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37 ; GFX90A-NEXT: renamable $vgpr12_vgpr13 = IMPLICIT_DEF @@ -590,7 +592,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.45.Flow26: ; GFX90A-NEXT: successors: %bb.47(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_XOR_B64 $exec, -1, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc @@ -606,7 +608,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.46.bb48: ; GFX90A-NEXT: successors: %bb.43(0x40000000), %bb.47(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr66_sgpr67, $sgpr58_sgpr59, $sgpr68_sgpr69, $sgpr64_sgpr65, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr66_sgpr67, $sgpr58_sgpr59, $sgpr68_sgpr69, $sgpr64_sgpr65, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr60 = V_ADD_CO_U32_e32 5120, $vgpr40, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc @@ -637,7 +639,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.47.Flow25: ; GFX90A-NEXT: successors: %bb.42(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr58_sgpr59, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr58_sgpr59, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr44_sgpr45, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_XOR_B64 $exec, -1, implicit-def dead $scc @@ -655,21 +657,21 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.48.bb63: ; GFX90A-NEXT: successors: %bb.50(0x40000000), %bb.49(0x40000000) - ; GFX90A-NEXT: liveins: $vcc, $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49 + ; GFX90A-NEXT: liveins: $vcc, $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0 ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.50, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.49: ; GFX90A-NEXT: successors: %bb.44(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 -1 ; GFX90A-NEXT: S_BRANCH %bb.44 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.50.bb68: ; GFX90A-NEXT: successors: %bb.54(0x40000000), %bb.51(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr0 = nuw nsw V_LSHLREV_B32_e32 3, $vgpr6, implicit $exec ; GFX90A-NEXT: renamable $vgpr1 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec @@ -678,7 +680,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.51: ; GFX90A-NEXT: successors: %bb.45(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 -1 ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37 @@ -698,16 +700,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.52.bb80: ; GFX90A-NEXT: successors: %bb.59(0x40000000), %bb.53(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: dead renamable $sgpr17 = S_BFE_U32 renamable $sgpr20, 65560, implicit-def $scc + ; GFX90A-NEXT: dead renamable $sgpr15 = S_BFE_U32 renamable $sgpr20, 65560, implicit-def $scc ; GFX90A-NEXT: renamable $vgpr10 = V_ADD_CO_U32_e32 4096, $vgpr0, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $vgpr11, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, 0, killed $vcc, 0, implicit $exec ; GFX90A-NEXT: S_CBRANCH_SCC0 %bb.59, implicit killed $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.53: ; GFX90A-NEXT: successors: %bb.61(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 -1 @@ -726,7 +728,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.54.bb73: ; GFX90A-NEXT: successors: %bb.52(0x40000000), %bb.55(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr3 = FLAT_LOAD_UBYTE renamable $vgpr0_vgpr1, 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i76) ; GFX90A-NEXT: renamable $vgpr8 = V_ADD_CO_U32_e32 2048, $vgpr0, implicit-def $vcc, implicit $exec @@ -751,14 +753,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.55.Flow29: ; GFX90A-NEXT: successors: %bb.45(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr62_sgpr63, implicit-def $scc ; GFX90A-NEXT: S_BRANCH %bb.45 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.56.bb90: ; GFX90A-NEXT: successors: %bb.60(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr30 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr64_sgpr65, implicit $exec ; GFX90A-NEXT: renamable $vgpr3 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec @@ -778,7 +780,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.57: ; GFX90A-NEXT: successors: %bb.7(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr24 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec ; GFX90A-NEXT: renamable $vgpr20 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec @@ -823,7 +825,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr26_vgpr27 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.419, addrspace 3) ; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr21, implicit $exec ; GFX90A-NEXT: renamable $vgpr2_vgpr3 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.4, addrspace 3) - ; GFX90A-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr17, implicit $exec + ; GFX90A-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr15, implicit $exec ; GFX90A-NEXT: renamable $vgpr16_vgpr17 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.420, addrspace 3) ; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr22, implicit $exec ; GFX90A-NEXT: renamable $vgpr32_vgpr33 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.5, addrspace 3) @@ -832,7 +834,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.59.bb85: ; GFX90A-NEXT: successors: %bb.56(0x40000000), %bb.60(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr12 = V_OR_B32_e32 1, $vgpr10, implicit $exec ; GFX90A-NEXT: renamable $vgpr13 = COPY renamable $vgpr11, implicit $exec @@ -854,14 +856,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.60.Flow31: ; GFX90A-NEXT: successors: %bb.61(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr54_sgpr55, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.61.Flow30: ; GFX90A-NEXT: successors: %bb.55(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_XOR_B64 $exec, -1, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc @@ -873,7 +875,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.62.bb140: ; GFX90A-NEXT: successors: %bb.68(0x40000000), %bb.63(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr24_sgpr25 = S_MOV_B64 -1 ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr30_sgpr31, implicit-def dead $scc @@ -881,14 +883,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.63.Flow13: ; GFX90A-NEXT: successors: %bb.64(0x40000000), %bb.66(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.66, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.64.bb159: ; GFX90A-NEXT: successors: %bb.67(0x40000000), %bb.65(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vcc = V_CMP_NE_U32_e64 0, killed $vgpr6, implicit $exec ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec @@ -897,21 +899,21 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.65.Flow10: ; GFX90A-NEXT: successors: %bb.66(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $sgpr12_sgpr13 = S_ANDN2_SAVEEXEC_B64 $sgpr12_sgpr13, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.66.Flow14: ; GFX90A-NEXT: successors: %bb.8(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = COPY $exec ; GFX90A-NEXT: S_BRANCH %bb.8 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.67.bb161: ; GFX90A-NEXT: successors: %bb.65(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr27, killed $vgpr29, implicit $exec ; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr33, implicit $exec @@ -930,7 +932,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.68.bb174: ; GFX90A-NEXT: successors: %bb.72(0x40000000), %bb.69(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $agpr0 = COPY killed renamable $vgpr14, implicit $exec ; GFX90A-NEXT: renamable $vgpr34 = V_OR_B32_e32 1, $vgpr32, implicit $exec @@ -947,14 +949,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.69.Flow: ; GFX90A-NEXT: successors: %bb.70(0x40000000), %bb.71(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.71, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.70.bb186: ; GFX90A-NEXT: successors: %bb.71(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = nsw V_LSHLREV_B64_e64 3, killed $vgpr4_vgpr5, implicit $exec ; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr27, implicit $exec @@ -983,7 +985,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.71.Flow9: ; GFX90A-NEXT: successors: %bb.63(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr24_sgpr25 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vgpr14 = COPY killed renamable $agpr0, implicit $exec @@ -991,7 +993,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.72.bb196: ; GFX90A-NEXT: successors: %bb.69(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 $vgpr14, killed $vgpr24, implicit $exec ; GFX90A-NEXT: renamable $vgpr22 = V_OR_B32_e32 killed $vgpr2, killed $vgpr22, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/call-constexpr.ll b/llvm/test/CodeGen/AMDGPU/call-constexpr.ll index fe0b0188d2d3..697576edef12 100644 --- a/llvm/test/CodeGen/AMDGPU/call-constexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/call-constexpr.ll @@ -305,6 +305,7 @@ define amdgpu_kernel void @test_invoke() #0 personality ptr @__gxx_personality_v ; SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; SDAG-NEXT: v_or_b32_e32 v31, v0, v2 ; SDAG-NEXT: v_mov_b32_e32 v0, 2.0 +; SDAG-NEXT: ; implicit-def: $sgpr15 ; SDAG-NEXT: s_mov_b32 s14, s16 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19] ; SDAG-NEXT: v_add_f32_e32 v0, 1.0, v0 @@ -330,6 +331,7 @@ define amdgpu_kernel void @test_invoke() #0 personality ptr @__gxx_personality_v ; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 ; GISEL-NEXT: v_or_b32_e32 v31, v0, v2 ; GISEL-NEXT: v_mov_b32_e32 v0, 2.0 +; GISEL-NEXT: ; implicit-def: $sgpr15 ; GISEL-NEXT: s_mov_b32 s14, s16 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0 diff --git a/llvm/test/CodeGen/AMDGPU/call-skip.ll b/llvm/test/CodeGen/AMDGPU/call-skip.ll index e2ca278d687b..ada817b40244 100644 --- a/llvm/test/CodeGen/AMDGPU/call-skip.ll +++ b/llvm/test/CodeGen/AMDGPU/call-skip.ll @@ -5,6 +5,10 @@ ; A call should be skipped if all lanes are zero, since we don't know ; what side effects should be avoided inside the call. define hidden void @func() #1 { +; GCN-LABEL: func: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] ret void } @@ -85,19 +89,20 @@ define amdgpu_kernel void @if_call_kernel() #0 { ; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 -; SDAG-NEXT: s_and_saveexec_b64 s[12:13], vcc +; SDAG-NEXT: s_and_saveexec_b64 s[18:19], vcc ; SDAG-NEXT: s_cbranch_execz .LBB3_2 ; SDAG-NEXT: ; %bb.1: ; %call ; SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; SDAG-NEXT: s_mov_b32 s13, s15 ; SDAG-NEXT: s_getpc_b64 s[18:19] ; SDAG-NEXT: s_add_u32 s18, s18, func@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s19, s19, func@rel32@hi+12 ; SDAG-NEXT: v_or_b32_e32 v31, v0, v2 ; SDAG-NEXT: s_mov_b32 s12, s14 -; SDAG-NEXT: s_mov_b32 s13, s15 ; SDAG-NEXT: s_mov_b32 s14, s16 +; SDAG-NEXT: ; implicit-def: $sgpr15 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19] ; SDAG-NEXT: .LBB3_2: ; %end ; SDAG-NEXT: s_endpgm @@ -111,19 +116,20 @@ define amdgpu_kernel void @if_call_kernel() #0 { ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 -; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[18:19], vcc ; GISEL-NEXT: s_cbranch_execz .LBB3_2 ; GISEL-NEXT: ; %bb.1: ; %call ; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 ; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2 +; GISEL-NEXT: s_mov_b32 s13, s15 ; GISEL-NEXT: s_getpc_b64 s[18:19] ; GISEL-NEXT: s_add_u32 s18, s18, func@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s19, s19, func@rel32@hi+12 ; GISEL-NEXT: v_or_b32_e32 v31, v0, v1 ; GISEL-NEXT: s_mov_b32 s12, s14 -; GISEL-NEXT: s_mov_b32 s13, s15 ; GISEL-NEXT: s_mov_b32 s14, s16 +; GISEL-NEXT: ; implicit-def: $sgpr15 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GISEL-NEXT: .LBB3_2: ; %end ; GISEL-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll index 38c20c7cf62d..45836ff81f77 100644 --- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -184,21 +184,22 @@ define amdgpu_kernel void @v3i16_registers(i1 %cond) #0 { ; GCN-NEXT: s_mov_b32 s32, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_bitcmp1_b32 s12, 0 -; GCN-NEXT: s_cselect_b64 s[12:13], -1, 0 -; GCN-NEXT: s_and_b64 vcc, exec, s[12:13] +; GCN-NEXT: s_cselect_b64 s[18:19], -1, 0 +; GCN-NEXT: s_and_b64 vcc, exec, s[18:19] ; GCN-NEXT: s_cbranch_vccnz .LBB4_2 ; GCN-NEXT: ; %bb.1: ; %if.else ; GCN-NEXT: s_add_u32 s8, s8, 8 ; GCN-NEXT: s_addc_u32 s9, s9, 0 ; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_getpc_b64 s[18:19] ; GCN-NEXT: s_add_u32 s18, s18, func_v3i16@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s19, s19, func_v3i16@rel32@hi+12 ; GCN-NEXT: v_or3_b32 v31, v0, v1, v2 ; GCN-NEXT: s_mov_b32 s12, s14 -; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 s14, s16 +; GCN-NEXT: ; implicit-def: $sgpr15 ; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GCN-NEXT: s_branch .LBB4_3 ; GCN-NEXT: .LBB4_2: @@ -235,21 +236,22 @@ define amdgpu_kernel void @v3f16_registers(i1 %cond) #0 { ; GCN-NEXT: s_mov_b32 s32, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_bitcmp1_b32 s12, 0 -; GCN-NEXT: s_cselect_b64 s[12:13], -1, 0 -; GCN-NEXT: s_and_b64 vcc, exec, s[12:13] +; GCN-NEXT: s_cselect_b64 s[18:19], -1, 0 +; GCN-NEXT: s_and_b64 vcc, exec, s[18:19] ; GCN-NEXT: s_cbranch_vccnz .LBB5_2 ; GCN-NEXT: ; %bb.1: ; %if.else ; GCN-NEXT: s_add_u32 s8, s8, 8 ; GCN-NEXT: s_addc_u32 s9, s9, 0 ; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_getpc_b64 s[18:19] ; GCN-NEXT: s_add_u32 s18, s18, func_v3f16@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s19, s19, func_v3f16@rel32@hi+12 ; GCN-NEXT: v_or3_b32 v31, v0, v1, v2 ; GCN-NEXT: s_mov_b32 s12, s14 -; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 s14, s16 +; GCN-NEXT: ; implicit-def: $sgpr15 ; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GCN-NEXT: s_branch .LBB5_3 ; GCN-NEXT: .LBB5_2: diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll index 103ca48a7dc5..d6e11e6af6d8 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll @@ -386,6 +386,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -462,6 +463,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: s_mov_b64 s[0:1], exec @@ -528,6 +530,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1 @@ -594,6 +597,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1 @@ -649,6 +653,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_bfrev_b32_e32 v1, 1 @@ -688,16 +693,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: .LBB1_1: ; %ComputeLoop @@ -751,6 +758,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -803,6 +811,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -885,6 +894,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -962,6 +972,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -1023,6 +1034,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -1078,16 +1090,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0 @@ -1599,6 +1613,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -1675,6 +1690,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: s_mov_b64 s[0:1], exec @@ -1741,6 +1757,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1 @@ -1807,6 +1824,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1 @@ -1862,6 +1880,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_bfrev_b32_e32 v1, 1 @@ -1901,16 +1920,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: .LBB3_1: ; %ComputeLoop @@ -1964,6 +1985,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -2016,6 +2038,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -2098,6 +2121,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -2175,6 +2199,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -2236,6 +2261,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -2291,16 +2317,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0 @@ -2872,6 +2900,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -2948,6 +2977,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: s_mov_b64 s[0:1], exec @@ -3014,6 +3044,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1 @@ -3080,6 +3111,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1 @@ -3135,6 +3167,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_bfrev_b32_e32 v1, 1 @@ -3174,16 +3207,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: .LBB5_1: ; %ComputeLoop @@ -3237,6 +3272,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -3289,6 +3325,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -3371,6 +3408,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -3448,6 +3486,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -3509,6 +3548,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -3564,16 +3604,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0 @@ -3641,6 +3683,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -3717,6 +3760,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: s_mov_b64 s[0:1], exec @@ -3783,6 +3827,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1 @@ -3849,6 +3894,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1 @@ -3904,6 +3950,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_bfrev_b32_e32 v1, 1 @@ -3943,16 +3990,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: .LBB6_1: ; %ComputeLoop @@ -4006,6 +4055,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -4058,6 +4108,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -4140,6 +4191,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -4217,6 +4269,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -4278,6 +4331,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -4333,16 +4387,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0 @@ -4913,6 +4969,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -4989,6 +5046,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: s_mov_b64 s[0:1], exec @@ -5055,6 +5113,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1 @@ -5121,6 +5180,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1 @@ -5176,6 +5236,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_bfrev_b32_e32 v2, 1 @@ -5228,16 +5289,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_bfrev_b32_e32 v2, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: .LBB8_1: ; %ComputeLoop @@ -5304,6 +5367,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -5356,6 +5420,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -5438,6 +5503,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -5515,6 +5581,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -5576,6 +5643,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -5644,16 +5712,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0 @@ -6165,6 +6235,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -6246,6 +6317,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: v_mov_b32_e32 v4, 0 @@ -6315,6 +6387,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 @@ -6384,6 +6457,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 @@ -6442,6 +6516,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 @@ -6497,16 +6572,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v4, 0 ; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo @@ -6575,6 +6652,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -6630,6 +6708,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -6729,6 +6808,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -6818,6 +6898,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -6891,6 +6972,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -6973,16 +7055,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0 @@ -7592,6 +7676,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -7673,6 +7758,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: v_mov_b32_e32 v4, 0 @@ -7742,6 +7828,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 @@ -7811,6 +7898,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 @@ -7869,6 +7957,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 @@ -7924,16 +8013,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v4, 0 ; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo @@ -8002,6 +8093,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -8057,6 +8149,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -8156,6 +8249,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -8245,6 +8339,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -8318,6 +8413,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -8400,16 +8496,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0 @@ -9019,6 +9117,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -9100,6 +9199,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: v_mov_b32_e32 v4, 0 @@ -9169,6 +9269,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 @@ -9238,6 +9339,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 @@ -9296,6 +9398,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 @@ -9351,16 +9454,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v4, 0 ; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo @@ -9429,6 +9534,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -9484,6 +9590,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -9583,6 +9690,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -9672,6 +9780,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -9745,6 +9854,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -9827,16 +9937,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0 @@ -9928,6 +10040,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -10009,6 +10122,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: v_mov_b32_e32 v4, 0 @@ -10078,6 +10192,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 @@ -10147,6 +10262,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 @@ -10205,6 +10321,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 @@ -10260,16 +10377,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v4, 0 ; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo @@ -10338,6 +10457,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -10393,6 +10513,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -10492,6 +10613,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -10581,6 +10703,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -10654,6 +10777,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -10736,16 +10860,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0 @@ -11355,6 +11481,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -11436,6 +11563,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: v_mov_b32_e32 v4, 0 @@ -11505,6 +11633,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 @@ -11574,6 +11703,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 @@ -11632,6 +11762,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 @@ -11687,16 +11818,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v4, 0 ; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo @@ -11765,6 +11898,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -11820,6 +11954,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -11919,6 +12054,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -12008,6 +12144,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -12081,6 +12218,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -12163,16 +12301,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll index 2160976599dd..957ff4766e70 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll @@ -294,6 +294,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -374,6 +375,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: s_mov_b64 s[0:1], exec @@ -444,6 +446,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000 @@ -500,6 +503,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000 @@ -545,6 +549,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000 @@ -587,16 +592,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: .LBB1_1: ; %ComputeLoop @@ -652,6 +659,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -706,6 +714,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -797,6 +806,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -870,6 +880,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -925,6 +936,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -990,16 +1002,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0 @@ -1327,6 +1341,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -1407,6 +1422,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: s_mov_b64 s[0:1], exec @@ -1477,6 +1493,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000 @@ -1533,6 +1550,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000 @@ -1578,6 +1596,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000 @@ -1620,16 +1639,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: .LBB3_1: ; %ComputeLoop @@ -1685,6 +1706,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -1739,6 +1761,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -1830,6 +1853,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -1903,6 +1927,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -1958,6 +1983,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -2023,16 +2049,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0 @@ -2360,6 +2388,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -2440,6 +2469,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: s_mov_b64 s[0:1], exec @@ -2510,6 +2540,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000 @@ -2566,6 +2597,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000 @@ -2611,6 +2643,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000 @@ -2653,16 +2686,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: .LBB5_1: ; %ComputeLoop @@ -2718,6 +2753,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -2772,6 +2808,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -2863,6 +2900,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -2936,6 +2974,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -2991,6 +3030,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -3056,16 +3096,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0 @@ -3474,6 +3516,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -3559,6 +3602,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: v_mov_b32_e32 v4, 0 @@ -3632,6 +3676,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v2, 0 @@ -3690,6 +3735,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v2, 0 @@ -3737,6 +3783,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 @@ -3798,16 +3845,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v4, 0 ; GFX1132-NEXT: v_mov_b32_e32 v5, 0x7ff80000 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo @@ -3882,6 +3931,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -3939,6 +3989,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -4047,6 +4098,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -4131,6 +4183,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -4197,6 +4250,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -4293,16 +4347,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0 ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0 @@ -4738,6 +4794,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -4823,6 +4880,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: v_mov_b32_e32 v4, 0 @@ -4896,6 +4954,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v2, 0 @@ -4954,6 +5013,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v2, 0 @@ -5001,6 +5061,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 @@ -5062,16 +5123,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v4, 0 ; GFX1132-NEXT: v_mov_b32_e32 v5, 0x7ff80000 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo @@ -5146,6 +5209,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -5203,6 +5267,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -5311,6 +5376,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -5395,6 +5461,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -5461,6 +5528,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -5557,16 +5625,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0 ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0 @@ -6002,6 +6072,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -6087,6 +6158,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: v_mov_b32_e32 v4, 0 @@ -6160,6 +6232,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v2, 0 @@ -6218,6 +6291,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v2, 0 @@ -6265,6 +6339,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 @@ -6326,16 +6401,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v4, 0 ; GFX1132-NEXT: v_mov_b32_e32 v5, 0x7ff80000 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo @@ -6410,6 +6487,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -6467,6 +6545,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -6575,6 +6654,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -6659,6 +6739,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -6725,6 +6806,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -6821,16 +6903,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0 ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll index 029fb9c11834..97659df4f649 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll @@ -294,6 +294,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -374,6 +375,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: s_mov_b64 s[0:1], exec @@ -444,6 +446,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000 @@ -500,6 +503,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000 @@ -545,6 +549,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000 @@ -587,16 +592,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: .LBB1_1: ; %ComputeLoop @@ -652,6 +659,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -706,6 +714,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -797,6 +806,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -870,6 +880,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -925,6 +936,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -990,16 +1002,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0 @@ -1327,6 +1341,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -1407,6 +1422,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: s_mov_b64 s[0:1], exec @@ -1477,6 +1493,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000 @@ -1533,6 +1550,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000 @@ -1578,6 +1596,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000 @@ -1620,16 +1639,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: .LBB3_1: ; %ComputeLoop @@ -1685,6 +1706,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -1739,6 +1761,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -1830,6 +1853,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -1903,6 +1927,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -1958,6 +1983,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -2023,16 +2049,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0 @@ -2360,6 +2388,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -2440,6 +2469,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: s_mov_b64 s[0:1], exec @@ -2510,6 +2540,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000 @@ -2566,6 +2597,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000 @@ -2611,6 +2643,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000 @@ -2653,16 +2686,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: .LBB5_1: ; %ComputeLoop @@ -2718,6 +2753,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -2772,6 +2808,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -2863,6 +2900,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -2936,6 +2974,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -2991,6 +3030,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -3056,16 +3096,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0 @@ -3474,6 +3516,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -3559,6 +3602,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: v_mov_b32_e32 v4, 0 @@ -3632,6 +3676,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v2, 0 @@ -3690,6 +3735,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v2, 0 @@ -3737,6 +3783,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 @@ -3798,16 +3845,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v4, 0 ; GFX1132-NEXT: v_mov_b32_e32 v5, 0x7ff80000 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo @@ -3882,6 +3931,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -3939,6 +3989,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -4047,6 +4098,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -4131,6 +4183,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -4197,6 +4250,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -4293,16 +4347,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0 ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0 @@ -4738,6 +4794,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -4823,6 +4880,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: v_mov_b32_e32 v4, 0 @@ -4896,6 +4954,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v2, 0 @@ -4954,6 +5013,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v2, 0 @@ -5001,6 +5061,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 @@ -5062,16 +5123,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v4, 0 ; GFX1132-NEXT: v_mov_b32_e32 v5, 0x7ff80000 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo @@ -5146,6 +5209,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -5203,6 +5267,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -5311,6 +5376,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -5395,6 +5461,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -5461,6 +5528,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -5557,16 +5625,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0 ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0 @@ -6002,6 +6072,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -6087,6 +6158,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: v_mov_b32_e32 v4, 0 @@ -6160,6 +6232,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v2, 0 @@ -6218,6 +6291,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v2, 0 @@ -6265,6 +6339,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 @@ -6326,16 +6401,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v4, 0 ; GFX1132-NEXT: v_mov_b32_e32 v5, 0x7ff80000 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo @@ -6410,6 +6487,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -6467,6 +6545,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -6575,6 +6654,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -6659,6 +6739,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -6725,6 +6806,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -6821,16 +6903,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0 ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll index 3250d95bb0b7..68e87b16c66f 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll @@ -446,6 +446,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -522,6 +523,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: s_mov_b64 s[0:1], exec @@ -588,6 +590,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1 @@ -654,6 +657,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1 @@ -709,6 +713,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_bfrev_b32_e32 v2, 1 @@ -761,16 +766,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_bfrev_b32_e32 v2, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: .LBB1_1: ; %ComputeLoop @@ -837,6 +844,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -889,6 +897,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -971,6 +980,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -1048,6 +1058,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -1109,6 +1120,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -1177,16 +1189,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0 @@ -1771,6 +1785,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -1847,6 +1862,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: s_mov_b64 s[0:1], exec @@ -1913,6 +1929,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1 @@ -1979,6 +1996,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1 @@ -2034,6 +2052,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_bfrev_b32_e32 v2, 1 @@ -2086,16 +2105,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_bfrev_b32_e32 v2, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: .LBB3_1: ; %ComputeLoop @@ -2162,6 +2183,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -2214,6 +2236,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -2296,6 +2319,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -2373,6 +2397,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -2434,6 +2459,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -2502,16 +2528,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0 @@ -3096,6 +3124,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -3172,6 +3201,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: s_mov_b64 s[0:1], exec @@ -3238,6 +3268,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1 @@ -3304,6 +3335,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1 @@ -3359,6 +3391,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_bfrev_b32_e32 v2, 1 @@ -3411,16 +3444,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_bfrev_b32_e32 v2, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: .LBB5_1: ; %ComputeLoop @@ -3487,6 +3522,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -3539,6 +3575,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -3621,6 +3658,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -3698,6 +3736,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -3759,6 +3798,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -3827,16 +3867,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0 @@ -3917,6 +3959,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -3993,6 +4036,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: s_mov_b64 s[0:1], exec @@ -4059,6 +4103,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1 @@ -4125,6 +4170,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1 @@ -4180,6 +4226,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_bfrev_b32_e32 v2, 1 @@ -4232,16 +4279,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_bfrev_b32_e32 v2, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: .LBB6_1: ; %ComputeLoop @@ -4308,6 +4357,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -4360,6 +4410,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -4442,6 +4493,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -4519,6 +4571,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -4580,6 +4633,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -4648,16 +4702,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0 @@ -5241,6 +5297,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -5317,6 +5374,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: s_mov_b64 s[0:1], exec @@ -5383,6 +5441,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1 @@ -5449,6 +5508,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1 @@ -5504,6 +5564,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_bfrev_b32_e32 v2, 1 @@ -5556,16 +5617,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_bfrev_b32_e32 v2, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: .LBB8_1: ; %ComputeLoop @@ -5632,6 +5695,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -5684,6 +5748,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -5766,6 +5831,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -5843,6 +5909,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -5904,6 +5971,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -5972,16 +6040,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0 @@ -6493,6 +6563,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -6574,6 +6645,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: v_mov_b32_e32 v4, 0 @@ -6643,6 +6715,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 @@ -6712,6 +6785,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 @@ -6770,6 +6844,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 @@ -6825,16 +6900,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v4, 0 ; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo @@ -6903,6 +6980,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -6958,6 +7036,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -7057,6 +7136,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -7146,6 +7226,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -7219,6 +7300,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -7301,16 +7383,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0 @@ -7919,6 +8003,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -8000,6 +8085,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: v_mov_b32_e32 v4, 0 @@ -8069,6 +8155,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 @@ -8138,6 +8225,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 @@ -8196,6 +8284,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 @@ -8251,16 +8340,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v4, 0 ; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo @@ -8329,6 +8420,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -8384,6 +8476,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -8483,6 +8576,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -8572,6 +8666,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -8645,6 +8740,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -8727,16 +8823,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0 @@ -9346,6 +9444,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -9427,6 +9526,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: v_mov_b32_e32 v4, 0 @@ -9496,6 +9596,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 @@ -9565,6 +9666,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 @@ -9623,6 +9725,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 @@ -9678,16 +9781,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v4, 0 ; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo @@ -9756,6 +9861,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -9811,6 +9917,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -9910,6 +10017,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -9999,6 +10107,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -10072,6 +10181,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -10154,16 +10264,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0 @@ -10255,6 +10367,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -10336,6 +10449,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: v_mov_b32_e32 v4, 0 @@ -10405,6 +10519,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 @@ -10474,6 +10589,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 @@ -10532,6 +10648,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 @@ -10587,16 +10704,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v4, 0 ; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo @@ -10665,6 +10784,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -10720,6 +10840,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -10819,6 +10940,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -10908,6 +11030,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -10981,6 +11104,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -11063,16 +11187,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0 @@ -11681,6 +11807,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -11762,6 +11889,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: v_mov_b32_e32 v4, 0 @@ -11831,6 +11959,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 +; GFX1064-NEXT: ; implicit-def: $sgpr15 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 @@ -11900,6 +12029,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr15 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 @@ -11958,6 +12088,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b32 s32, 0 +; GFX1164-NEXT: ; implicit-def: $sgpr15 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 @@ -12013,16 +12144,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-NEXT: s_getpc_b64 s[4:5] ; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-NEXT: s_mov_b32 s16, s15 +; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b32 s12, s13 -; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b32 s13, s14 -; GFX1132-NEXT: s_mov_b32 s14, s15 +; GFX1132-NEXT: s_mov_b32 s14, s16 ; GFX1132-NEXT: s_mov_b32 s32, 0 +; GFX1132-NEXT: ; implicit-def: $sgpr15 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-NEXT: v_mov_b32_e32 v4, 0 ; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo @@ -12091,6 +12224,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 +; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] @@ -12146,6 +12280,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 +; GFX9-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -12245,6 +12380,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -12334,6 +12470,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -12407,6 +12544,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -12489,16 +12627,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5] ; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 ; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value@gotpcrel32@hi+12 +; GFX1132-DPP-NEXT: s_mov_b32 s16, s15 +; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 -; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b32 s13, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s14, s15 +; GFX1132-DPP-NEXT: s_mov_b32 s14, s16 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 0 +; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0 diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll index d895a75de6e8..7d8a0b70d5f5 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll @@ -57,18 +57,19 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_mov_b64 s[18:19], s[4:5] ; GFX11-NEXT: v_mov_b32_e32 v31, v0 -; GFX11-NEXT: s_load_b32 s24, s[18:19], 0x24 +; GFX11-NEXT: s_load_b32 s25, s[18:19], 0x24 +; GFX11-NEXT: s_mov_b32 s17, s15 ; GFX11-NEXT: s_mov_b32 s12, s13 ; GFX11-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX11-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v31 +; GFX11-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX11-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX11-NEXT: s_mov_b32 s20, 0 ; GFX11-NEXT: s_mov_b32 s0, -1 -; GFX11-NEXT: s_mov_b32 s17, exec_lo +; GFX11-NEXT: s_mov_b32 s24, exec_lo ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_mul_lo_u32 v0, s24, v0 +; GFX11-NEXT: v_mul_lo_u32 v0, s25, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX11-NEXT: s_cbranch_execz .LBB2_13 @@ -76,7 +77,7 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg ; GFX11-NEXT: s_load_b128 s[20:23], s[18:19], 0x2c ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_bitcmp1_b32 s21, 0 -; GFX11-NEXT: s_cselect_b32 s25, -1, 0 +; GFX11-NEXT: s_cselect_b32 s26, -1, 0 ; GFX11-NEXT: s_bitcmp0_b32 s21, 0 ; GFX11-NEXT: s_mov_b32 s21, 0 ; GFX11-NEXT: s_cbranch_scc0 .LBB2_3 @@ -86,13 +87,14 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, f0@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, f0@gotpcrel32@hi+12 +; GFX11-NEXT: ; implicit-def: $sgpr15 ; GFX11-NEXT: s_mov_b32 s13, s14 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: s_mov_b32 s26, s14 -; GFX11-NEXT: s_mov_b32 s14, s15 +; GFX11-NEXT: s_mov_b32 s15, s14 +; GFX11-NEXT: s_mov_b32 s14, s17 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_mov_b32 s14, s26 +; GFX11-NEXT: s_mov_b32 s14, s15 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_cbranch_execz .LBB2_4 ; GFX11-NEXT: s_branch .LBB2_12 @@ -126,11 +128,11 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg ; GFX11-NEXT: s_mul_i32 s0, s0, s22 ; GFX11-NEXT: s_mul_i32 s0, s0, s20 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_or_b32 s0, s24, s0 +; GFX11-NEXT: s_or_b32 s0, s25, s0 ; GFX11-NEXT: s_lshl_b64 s[22:23], s[0:1], 1 ; GFX11-NEXT: s_mov_b32 s0, s1 ; GFX11-NEXT: global_load_u16 v1, v0, s[22:23] -; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s25 +; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s26 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo @@ -151,7 +153,7 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg ; GFX11-NEXT: s_and_b32 s1, s1, 1 ; GFX11-NEXT: s_and_b32 s9, 0xffff, s0 ; GFX11-NEXT: s_cselect_b32 s9, -1, 0 -; GFX11-NEXT: s_and_b32 s16, s8, exec_lo +; GFX11-NEXT: s_and_b32 s15, s8, exec_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s9 ; GFX11-NEXT: v_readfirstlane_b32 s9, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) @@ -181,8 +183,8 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg ; GFX11-NEXT: s_and_b32 s20, s2, exec_lo ; GFX11-NEXT: s_or_not1_b32 s0, s21, exec_lo ; GFX11-NEXT: .LBB2_13: ; %Flow9 -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s17 -; GFX11-NEXT: s_and_saveexec_b32 s17, s0 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s24 +; GFX11-NEXT: s_and_saveexec_b32 s21, s0 ; GFX11-NEXT: s_cbranch_execz .LBB2_15 ; GFX11-NEXT: ; %bb.14: ; %bb43 ; GFX11-NEXT: s_add_u32 s8, s18, 0x58 @@ -192,12 +194,13 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg ; GFX11-NEXT: s_addc_u32 s1, s1, f0@gotpcrel32@hi+12 ; GFX11-NEXT: s_mov_b32 s13, s14 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: s_mov_b32 s14, s15 +; GFX11-NEXT: s_mov_b32 s14, s17 +; GFX11-NEXT: ; implicit-def: $sgpr15 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_or_b32 s20, s20, exec_lo ; GFX11-NEXT: .LBB2_15: ; %Flow14 -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s17 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s21 ; GFX11-NEXT: s_and_saveexec_b32 s0, s20 ; GFX11-NEXT: ; %bb.16: ; %UnifiedUnreachableBlock ; GFX11-NEXT: ; divergent unreachable diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll index 0e250baa8609..15f2dc086bdd 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll +++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll @@ -77,6 +77,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_mov_b32 s12, s51 ; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 +; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_mov_b32_e32 v41, v0 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 @@ -109,6 +110,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_mov_b32 s12, s51 ; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 +; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_bfe_u32 v0, v0, v1, 4 ; CHECK-NEXT: v_mov_b32_e32 v1, 12 @@ -194,6 +196,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47 +; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v57 @@ -220,6 +223,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: v_add_nc_u32_e32 v59, 1, v57 ; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47 +; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v59 @@ -246,6 +250,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: v_add_nc_u32_e32 v59, 2, v57 ; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47 +; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v59 @@ -272,6 +277,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: v_add_nc_u32_e32 v58, 3, v57 ; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47 +; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v58 @@ -322,6 +328,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47 +; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v57 @@ -356,6 +363,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_mov_b32 s12, s51 ; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 +; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_mov_b32 s4, exec_lo @@ -381,6 +389,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_mov_b32 s12, s51 ; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 +; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_add_co_u32 v41, vcc_lo, v0, v41 ; CHECK-NEXT: v_add_co_ci_u32_e64 v0, null, 0, v1, vcc_lo @@ -441,6 +450,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: v_or3_b32 v73, v2, v0, v1 ; CHECK-NEXT: s_mov_b32 s14, s33 +; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v73 ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 2, v73 ; CHECK-NEXT: v_and_b32_e32 v0, 0x7fffc, v0 @@ -501,6 +511,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_mov_b32 s12, s51 ; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 +; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: s_branch .LBB0_27 ; CHECK-NEXT: .LBB0_33: @@ -821,6 +832,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt ; CHECK-NEXT: s_mov_b32 s12, s51 ; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 +; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_mul_lo_u32 v44, v0, 14 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 @@ -854,6 +866,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt ; CHECK-NEXT: s_mov_b32 s12, s51 ; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 +; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_bfe_u32 v0, v0, v1, 4 ; CHECK-NEXT: v_mov_b32_e32 v1, 12 @@ -944,6 +957,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt ; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: v_add_nc_u32_e32 v43, 1, v43 +; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v56 diff --git a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll index 9585c486aeb9..263298d1cd1c 100644 --- a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll +++ b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll @@ -402,6 +402,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_inline_asm_phys_agpr(ptr add ; GFX908: ; %bb.0: ; %bb ; GFX908-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX908-NEXT: v_mov_b32_e32 v32, 0 +; GFX908-NEXT: ; implicit-def: $agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111_agpr112_agpr113_agpr114_agpr115_agpr116_agpr117_agpr118_agpr119_agpr120_agpr121_agpr122_agpr123_agpr124_agpr125_agpr126_agpr127_agpr128_agpr129_agpr130_agpr131 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use a[100:131] ; GFX908-NEXT: ;;#ASMEND @@ -638,6 +639,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call(ptr addrspace(1) %arg) ; GFX908-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX908-NEXT: s_mov_b32 s32, 0 ; GFX908-NEXT: v_mov_b32_e32 v40, 0 +; GFX908-NEXT: ; implicit-def: $sgpr15 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX908-NEXT: global_load_dwordx4 v[28:31], v40, s[34:35] offset:112 @@ -902,6 +904,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call_multi_bb(ptr addrspace( ; GFX908-NEXT: s_mov_b64 s[0:1], s[52:53] ; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX908-NEXT: s_mov_b64 s[2:3], s[54:55] +; GFX908-NEXT: ; implicit-def: $sgpr15 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX908-NEXT: .LBB6_2: ; %bb3 diff --git a/llvm/test/CodeGen/AMDGPU/multi-use-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/multi-use-implicit-def.mir new file mode 100644 index 000000000000..788065fd1391 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/multi-use-implicit-def.mir @@ -0,0 +1,49 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 + +# Ensure processing an IMPLICIT_DEF of a physreg updates all uses +# before removing the IMPLICIT_DEF. -verify-machineinstrs will +# fail otherwise. + +# RUN: llc -mtriple=amdgcn -mcpu=gfx802 -run-pass processimpdefs -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck %s + +--- + +name: implicit_def_multiple_use +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: implicit_def_multiple_use + ; CHECK: $vgpr1_lo16 = COPY undef $vgpr0_hi16 + ; CHECK-NEXT: $vgpr1_hi16 = COPY undef $vgpr0_lo16 + ; CHECK-NEXT: S_ENDPGM 0 + $vgpr0 = IMPLICIT_DEF + $vgpr1_lo16 = COPY $vgpr0_hi16 + $vgpr1_hi16 = COPY $vgpr0_lo16 + S_ENDPGM 0 +... + +# IMPLICIT_DEF processing Will not search across basic-blocks for uses. IMPLICIT_DEF must not be deleted. +--- + +name: implicit_def_cannot_find_all_uses +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: implicit_def_cannot_find_all_uses + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr1_lo16 = COPY undef $vgpr0_hi16 + ; CHECK-NEXT: $vgpr1_hi16 = COPY undef $vgpr0_lo16 + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.0, implicit undef $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + bb.0: + $vgpr0 = IMPLICIT_DEF + $vgpr1_lo16 = COPY $vgpr0_hi16 + $vgpr1_hi16 = COPY $vgpr0_lo16 + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.0, implicit $scc + bb.1: +... diff --git a/llvm/test/CodeGen/AMDGPU/phi-av-pressure.ll b/llvm/test/CodeGen/AMDGPU/phi-av-pressure.ll index b0c672d3c55d..d7a6cce1fea3 100644 --- a/llvm/test/CodeGen/AMDGPU/phi-av-pressure.ll +++ b/llvm/test/CodeGen/AMDGPU/phi-av-pressure.ll @@ -30,6 +30,7 @@ define amdgpu_kernel void @main(i1 %arg, ptr %ptr, ptr addrspace(1) %ptr1, ptr a ; GFX950-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX950-NEXT: v_mov_b32_e32 v31, v0 ; GFX950-NEXT: s_mov_b32 s32, 0 +; GFX950-NEXT: ; implicit-def: $sgpr15 ; GFX950-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX950-NEXT: v_mov_b32_e32 v1, 0 ; GFX950-NEXT: v_lshl_add_u64 v[10:11], v[0:1], 3, s[38:39] diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll index a2b0f4d56ebe..6084381da84f 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll @@ -27,6 +27,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) { ; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 +; GFX8-NEXT: ; implicit-def: $sgpr15 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 7, v0 @@ -108,6 +109,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) { ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 7, v0 @@ -178,6 +180,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) { ; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX10-NEXT: s_mov_b32 s32, 0 +; GFX10-NEXT: ; implicit-def: $sgpr15 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 7, v0 @@ -241,6 +244,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) { ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_load_b64 s[34:35], s[4:5], 0x24 ; GFX11-NEXT: s_mov_b32 s32, 0 +; GFX11-NEXT: ; implicit-def: $sgpr15 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 7, v0 @@ -362,6 +366,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 +; GFX8-NEXT: ; implicit-def: $sgpr15 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 17, v0 @@ -489,6 +494,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX900-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: s_mov_b32 s32, 0 +; GFX900-NEXT: ; implicit-def: $sgpr15 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX900-NEXT: v_and_b32_e32 v1, 0xff, v0 @@ -599,6 +605,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX10-NEXT: s_mov_b32 s32, 0 +; GFX10-NEXT: ; implicit-def: $sgpr15 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 17, v0 @@ -711,6 +718,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX90A-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: s_mov_b32 s32, 0 +; GFX90A-NEXT: ; implicit-def: $sgpr15 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX90A-NEXT: v_and_b32_e32 v1, 0xff, v0 @@ -812,6 +820,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_load_b64 s[34:35], s[4:5], 0x24 ; GFX11-NEXT: s_mov_b32 s32, 0 +; GFX11-NEXT: ; implicit-def: $sgpr15 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v1, 17, v0 @@ -1036,6 +1045,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 +; GFX8-NEXT: ; implicit-def: $sgpr15 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 7, v0 @@ -1122,6 +1132,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 7, v0 @@ -1179,6 +1190,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX10-NEXT: s_mov_b32 s32, 0 +; GFX10-NEXT: ; implicit-def: $sgpr15 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 7, v0 @@ -1236,6 +1248,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_load_b64 s[34:35], s[4:5], 0x24 ; GFX11-NEXT: s_mov_b32 s32, 0 +; GFX11-NEXT: ; implicit-def: $sgpr15 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 7, v0 @@ -1353,6 +1366,7 @@ define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) { ; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 +; GFX8-NEXT: ; implicit-def: $sgpr15 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 7, v0 @@ -1578,6 +1592,7 @@ define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) { ; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 +; GFX8-NEXT: ; implicit-def: $sgpr15 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 7, v0 @@ -1786,6 +1801,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1, ; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 +; GFX8-NEXT: ; implicit-def: $sgpr15 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 7, v0 @@ -1849,6 +1865,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1, ; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 7, v0 @@ -1908,6 +1925,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1, ; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX10-NEXT: s_mov_b32 s32, 0 +; GFX10-NEXT: ; implicit-def: $sgpr15 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 7, v0 @@ -1958,6 +1976,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1, ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_load_b128 s[36:39], s[4:5], 0x24 ; GFX11-NEXT: s_mov_b32 s32, 0 +; GFX11-NEXT: ; implicit-def: $sgpr15 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 7, v0 @@ -2058,6 +2077,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) { ; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 +; GFX8-NEXT: ; implicit-def: $sgpr15 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 7, v0 @@ -2139,6 +2159,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) { ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: ; implicit-def: $sgpr15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 7, v0 @@ -2208,6 +2229,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) { ; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX10-NEXT: s_mov_b32 s32, 0 +; GFX10-NEXT: ; implicit-def: $sgpr15 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 7, v0 @@ -2275,6 +2297,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) { ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_load_b64 s[34:35], s[4:5], 0x24 ; GFX11-NEXT: s_mov_b32 s32, 0 +; GFX11-NEXT: ; implicit-def: $sgpr15 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 7, v0 diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll index d33e94809b32..db04d8c1c267 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll @@ -25,6 +25,7 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr ; MUBUF-NEXT: s_add_u32 s4, s4, svm_eval_nodes@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s5, s5, svm_eval_nodes@rel32@hi+12 ; MUBUF-NEXT: s_mov_b32 s32, 0xc0000 +; MUBUF-NEXT: ; implicit-def: $sgpr15 ; MUBUF-NEXT: s_waitcnt lgkmcnt(0) ; MUBUF-NEXT: v_mov_b32_e32 v0, s0 ; MUBUF-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -61,6 +62,7 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr ; FLATSCR-NEXT: s_getpc_b64 s[0:1] ; FLATSCR-NEXT: s_add_u32 s0, s0, svm_eval_nodes@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s1, s1, svm_eval_nodes@rel32@hi+12 +; FLATSCR-NEXT: ; implicit-def: $sgpr15 ; FLATSCR-NEXT: s_waitcnt lgkmcnt(0) ; FLATSCR-NEXT: v_mov_b32_e32 v0, s2 ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -89,6 +91,7 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr ; MUBUF11-NEXT: s_add_u32 s0, s0, svm_eval_nodes@rel32@lo+4 ; MUBUF11-NEXT: s_addc_u32 s1, s1, svm_eval_nodes@rel32@hi+12 ; MUBUF11-NEXT: s_movk_i32 s32, 0x6000 +; MUBUF11-NEXT: ; implicit-def: $sgpr15 ; MUBUF11-NEXT: s_waitcnt lgkmcnt(0) ; MUBUF11-NEXT: v_mov_b32_e32 v0, s2 ; MUBUF11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -116,6 +119,7 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr ; FLATSCR11-NEXT: s_add_u32 s0, s0, svm_eval_nodes@rel32@lo+4 ; FLATSCR11-NEXT: s_addc_u32 s1, s1, svm_eval_nodes@rel32@hi+12 ; FLATSCR11-NEXT: s_movk_i32 s32, 0x6000 +; FLATSCR11-NEXT: ; implicit-def: $sgpr15 ; FLATSCR11-NEXT: s_waitcnt lgkmcnt(0) ; FLATSCR11-NEXT: v_mov_b32_e32 v0, s2 ; FLATSCR11-NEXT: s_swappc_b64 s[30:31], s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/subreg-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/subreg-implicit-def.mir new file mode 100644 index 000000000000..9eb52fe4f82a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/subreg-implicit-def.mir @@ -0,0 +1,42 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 + +# Ensure processing an IMPLICIT_DEF of a physreg handles subreg definitions +# and super-reg uses correctly. + + +# RUN: llc -mtriple=amdgcn -mcpu=gfx802 -run-pass processimpdefs -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck %s + +# Must not add undef to use of implicit-def because def is partially redefined. +--- +name: impdef_subreg_def +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: impdef_subreg_def + ; CHECK: $sgpr0_sgpr1 = IMPLICIT_DEF + ; CHECK-NEXT: $sgpr0 = S_MOV_B32 0 + ; CHECK-NEXT: $sgpr2_sgpr3 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: S_ENDPGM 0 + $sgpr0_sgpr1 = IMPLICIT_DEF + $sgpr0 = S_MOV_B32 0 + $sgpr2_sgpr3 = COPY $sgpr0_sgpr1 + S_ENDPGM 0 +... + +# Must not add undef to use of implicit-def because use is larger than implicit definition. +--- +name: impdef_superreg_use +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: impdef_superreg_use + ; CHECK: $sgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $sgpr1 = S_MOV_B32 0 + ; CHECK-NEXT: $sgpr2_sgpr3 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: S_ENDPGM 0 + $sgpr0 = IMPLICIT_DEF + $sgpr1 = S_MOV_B32 0 + $sgpr2_sgpr3 = COPY $sgpr0_sgpr1 + S_ENDPGM 0 +... + diff --git a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll index 251908b1b0f9..03b653782e5c 100644 --- a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll +++ b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll @@ -39,7 +39,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: global_store_dword v[44:45], v42, off ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) ; GLOBALNESS1-NEXT: global_load_dword v2, v42, s[52:53] -; GLOBALNESS1-NEXT: s_mov_b64 s[48:49], s[4:5] +; GLOBALNESS1-NEXT: s_mov_b64 s[38:39], s[4:5] ; GLOBALNESS1-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18 ; GLOBALNESS1-NEXT: s_load_dword s7, s[8:9], 0x20 ; GLOBALNESS1-NEXT: s_add_u32 flat_scratch_lo, s12, s17 @@ -68,7 +68,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GLOBALNESS1-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GLOBALNESS1-NEXT: s_mov_b64 s[38:39], s[8:9] +; GLOBALNESS1-NEXT: s_mov_b64 s[48:49], s[8:9] ; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[8:9], 1, v1 ; GLOBALNESS1-NEXT: ; implicit-def: $vgpr57 : SGPR spill to VGPR lane ; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0 @@ -127,21 +127,22 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: ; =>This Loop Header: Depth=1 ; GLOBALNESS1-NEXT: ; Child Loop BB1_16 Depth 2 ; GLOBALNESS1-NEXT: flat_load_dword v40, v[46:47] -; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS1-NEXT: s_add_u32 s8, s48, 40 ; GLOBALNESS1-NEXT: buffer_store_dword v42, off, s[0:3], 0 ; GLOBALNESS1-NEXT: flat_load_dword v56, v[46:47] -; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS1-NEXT: s_addc_u32 s9, s49, 0 ; GLOBALNESS1-NEXT: s_getpc_b64 s[4:5] ; GLOBALNESS1-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4 ; GLOBALNESS1-NEXT: s_addc_u32 s5, s5, wobble@gotpcrel32@hi+12 ; GLOBALNESS1-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49] +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[38:39] ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] ; GLOBALNESS1-NEXT: s_mov_b32 s12, s84 ; GLOBALNESS1-NEXT: s_mov_b32 s13, s83 ; GLOBALNESS1-NEXT: s_mov_b32 s14, s82 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 +; GLOBALNESS1-NEXT: ; implicit-def: $sgpr15 ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) ; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[70:71] @@ -241,13 +242,13 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_15 ; GLOBALNESS1-NEXT: ; %bb.22: ; %bb55.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS1-NEXT: s_add_u32 s70, s38, 40 -; GLOBALNESS1-NEXT: s_addc_u32 s71, s39, 0 +; GLOBALNESS1-NEXT: s_add_u32 s70, s48, 40 +; GLOBALNESS1-NEXT: s_addc_u32 s71, s49, 0 ; GLOBALNESS1-NEXT: s_getpc_b64 s[4:5] ; GLOBALNESS1-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4 ; GLOBALNESS1-NEXT: s_addc_u32 s5, s5, wobble@gotpcrel32@hi+12 ; GLOBALNESS1-NEXT: s_load_dwordx2 s[54:55], s[4:5], 0x0 -; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49] +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[38:39] ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[70:71] ; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] @@ -257,7 +258,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) ; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[54:55] -; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49] +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[38:39] ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[70:71] ; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] @@ -266,6 +267,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_mov_b32 s14, s82 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[58:59], off +; GLOBALNESS1-NEXT: ; implicit-def: $sgpr15 ; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[54:55] ; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[96:97] ; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_14 @@ -275,7 +277,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_branch .LBB1_14 ; GLOBALNESS1-NEXT: .LBB1_24: ; %Flow23 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_load_dwordx4 s[4:7], s[38:39], 0x0 +; GLOBALNESS1-NEXT: s_load_dwordx4 s[4:7], s[48:49], 0x0 ; GLOBALNESS1-NEXT: v_readlane_b32 s70, v57, 8 ; GLOBALNESS1-NEXT: v_readlane_b32 s8, v57, 10 ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0 @@ -307,36 +309,38 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1 ; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_31 ; GLOBALNESS1-NEXT: ; %bb.30: ; %bb7.i.i -; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40 -; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS1-NEXT: s_add_u32 s8, s48, 40 +; GLOBALNESS1-NEXT: s_addc_u32 s9, s49, 0 ; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17] ; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 ; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 -; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49] +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[38:39] ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] ; GLOBALNESS1-NEXT: s_mov_b32 s12, s84 ; GLOBALNESS1-NEXT: s_mov_b32 s13, s83 ; GLOBALNESS1-NEXT: s_mov_b32 s14, s82 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 +; GLOBALNESS1-NEXT: ; implicit-def: $sgpr15 ; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0 ; GLOBALNESS1-NEXT: .LBB1_31: ; %Flow ; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_33 ; GLOBALNESS1-NEXT: ; %bb.32: ; %bb11.i.i -; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40 -; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS1-NEXT: s_add_u32 s8, s48, 40 +; GLOBALNESS1-NEXT: s_addc_u32 s9, s49, 0 ; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17] ; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 ; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 -; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49] +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[38:39] ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] ; GLOBALNESS1-NEXT: s_mov_b32 s12, s84 ; GLOBALNESS1-NEXT: s_mov_b32 s13, s83 ; GLOBALNESS1-NEXT: s_mov_b32 s14, s82 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 +; GLOBALNESS1-NEXT: ; implicit-def: $sgpr15 ; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GLOBALNESS1-NEXT: .LBB1_33: ; %UnifiedUnreachableBlock ; @@ -350,7 +354,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: global_store_dword v[44:45], v42, off ; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) ; GLOBALNESS0-NEXT: global_load_dword v2, v42, s[52:53] -; GLOBALNESS0-NEXT: s_mov_b64 s[48:49], s[4:5] +; GLOBALNESS0-NEXT: s_mov_b64 s[38:39], s[4:5] ; GLOBALNESS0-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18 ; GLOBALNESS0-NEXT: s_load_dword s7, s[8:9], 0x20 ; GLOBALNESS0-NEXT: s_add_u32 flat_scratch_lo, s12, s17 @@ -379,7 +383,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GLOBALNESS0-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GLOBALNESS0-NEXT: s_mov_b64 s[38:39], s[8:9] +; GLOBALNESS0-NEXT: s_mov_b64 s[48:49], s[8:9] ; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[8:9], 1, v1 ; GLOBALNESS0-NEXT: ; implicit-def: $vgpr57 : SGPR spill to VGPR lane ; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0 @@ -438,21 +442,22 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: ; =>This Loop Header: Depth=1 ; GLOBALNESS0-NEXT: ; Child Loop BB1_16 Depth 2 ; GLOBALNESS0-NEXT: flat_load_dword v40, v[46:47] -; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS0-NEXT: s_add_u32 s8, s48, 40 ; GLOBALNESS0-NEXT: buffer_store_dword v42, off, s[0:3], 0 ; GLOBALNESS0-NEXT: flat_load_dword v56, v[46:47] -; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS0-NEXT: s_addc_u32 s9, s49, 0 ; GLOBALNESS0-NEXT: s_getpc_b64 s[4:5] ; GLOBALNESS0-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4 ; GLOBALNESS0-NEXT: s_addc_u32 s5, s5, wobble@gotpcrel32@hi+12 ; GLOBALNESS0-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49] +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[38:39] ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] ; GLOBALNESS0-NEXT: s_mov_b32 s12, s82 ; GLOBALNESS0-NEXT: s_mov_b32 s13, s71 ; GLOBALNESS0-NEXT: s_mov_b32 s14, s70 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 +; GLOBALNESS0-NEXT: ; implicit-def: $sgpr15 ; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) ; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[84:85] @@ -553,13 +558,13 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_15 ; GLOBALNESS0-NEXT: ; %bb.22: ; %bb55.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS0-NEXT: s_add_u32 s84, s38, 40 -; GLOBALNESS0-NEXT: s_addc_u32 s85, s39, 0 +; GLOBALNESS0-NEXT: s_add_u32 s84, s48, 40 +; GLOBALNESS0-NEXT: s_addc_u32 s85, s49, 0 ; GLOBALNESS0-NEXT: s_getpc_b64 s[4:5] ; GLOBALNESS0-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4 ; GLOBALNESS0-NEXT: s_addc_u32 s5, s5, wobble@gotpcrel32@hi+12 ; GLOBALNESS0-NEXT: s_load_dwordx2 s[54:55], s[4:5], 0x0 -; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49] +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[38:39] ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[84:85] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] @@ -569,7 +574,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) ; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[54:55] -; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49] +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[38:39] ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[84:85] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] @@ -578,6 +583,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_mov_b32 s14, s70 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[58:59], off +; GLOBALNESS0-NEXT: ; implicit-def: $sgpr15 ; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[54:55] ; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[96:97] ; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_14 @@ -617,36 +623,38 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1 ; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_31 ; GLOBALNESS0-NEXT: ; %bb.30: ; %bb7.i.i -; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40 -; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS0-NEXT: s_add_u32 s8, s48, 40 +; GLOBALNESS0-NEXT: s_addc_u32 s9, s49, 0 ; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17] ; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 ; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 -; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49] +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[38:39] ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] ; GLOBALNESS0-NEXT: s_mov_b32 s12, s82 ; GLOBALNESS0-NEXT: s_mov_b32 s13, s71 ; GLOBALNESS0-NEXT: s_mov_b32 s14, s70 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 +; GLOBALNESS0-NEXT: ; implicit-def: $sgpr15 ; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0 ; GLOBALNESS0-NEXT: .LBB1_31: ; %Flow ; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_33 ; GLOBALNESS0-NEXT: ; %bb.32: ; %bb11.i.i -; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40 -; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS0-NEXT: s_add_u32 s8, s48, 40 +; GLOBALNESS0-NEXT: s_addc_u32 s9, s49, 0 ; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17] ; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 ; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 -; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49] +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[38:39] ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] ; GLOBALNESS0-NEXT: s_mov_b32 s12, s82 ; GLOBALNESS0-NEXT: s_mov_b32 s13, s71 ; GLOBALNESS0-NEXT: s_mov_b32 s14, s70 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 +; GLOBALNESS0-NEXT: ; implicit-def: $sgpr15 ; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GLOBALNESS0-NEXT: .LBB1_33: ; %UnifiedUnreachableBlock bb: diff --git a/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll b/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll index 3558c4e1d0c8..485d7d3e75b6 100644 --- a/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll +++ b/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll @@ -66,6 +66,7 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x ; CHECK-NEXT: flat_store_dwordx2 v[44:45], v[58:59] ; CHECK-NEXT: flat_store_dwordx2 v[56:57], v[62:63] ; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[54:55] ; CHECK-NEXT: flat_load_dwordx2 v[0:1], v[46:47] glc ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll index eba9faae5fdb..8206fd0b4fe1 100644 --- a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll +++ b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll @@ -150,6 +150,7 @@ define dso_local void @P10_Spill_CR_UN(ptr %arg, ptr %arg1, i32 %arg2) local_unn ; CHECK-NEXT: extsh r9, r3 ; CHECK-NEXT: extsw r6, r28 ; CHECK-NEXT: li r5, 0 +; CHECK-NEXT: # implicit-def: $f2 ; CHECK-NEXT: li r7, 0 ; CHECK-NEXT: std r30, 104(r1) ; CHECK-NEXT: std r29, 96(r1) @@ -308,6 +309,7 @@ define dso_local void @P10_Spill_CR_UN(ptr %arg, ptr %arg1, i32 %arg2) local_unn ; CHECK-BE-NEXT: extsh r9, r3 ; CHECK-BE-NEXT: extsw r6, r28 ; CHECK-BE-NEXT: li r5, 0 +; CHECK-BE-NEXT: # implicit-def: $f2 ; CHECK-BE-NEXT: li r7, 0 ; CHECK-BE-NEXT: std r30, 120(r1) ; CHECK-BE-NEXT: std r29, 112(r1) diff --git a/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll b/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll index 3803ac82458b..81b640d17c62 100644 --- a/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll +++ b/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll @@ -24,14 +24,22 @@ define signext i32 @foo() #1 personality ptr @__gxx_personality_v0 { ; CHECK-NEXT: addi s0, sp, 32 ; CHECK-NEXT: .cfi_def_cfa s0, 0 ; CHECK-NEXT: .cfi_remember_state -; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: .Ltmp0: # EH_LABEL ; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: # implicit-def: $x10 +; CHECK-NEXT: # implicit-def: $x11 +; CHECK-NEXT: # implicit-def: $x12 +; CHECK-NEXT: # implicit-def: $x13 +; CHECK-NEXT: # implicit-def: $x14 +; CHECK-NEXT: # implicit-def: $x15 +; CHECK-NEXT: # implicit-def: $x16 +; CHECK-NEXT: # implicit-def: $x17 ; CHECK-NEXT: call _Z3fooiiiiiiiiiiPi ; CHECK-NEXT: addi sp, sp, 32 -; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: .Ltmp1: # EH_LABEL ; CHECK-NEXT: # %bb.1: # %try.cont.unreachable ; CHECK-NEXT: .LBB0_2: # %lpad -; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: .Ltmp2: # EH_LABEL ; CHECK-NEXT: sext.w a1, a1 ; CHECK-NEXT: li a2, 1 ; CHECK-NEXT: bne a1, a2, .LBB0_4 diff --git a/llvm/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll b/llvm/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll index 17027e2e4e5b..8e2af5b44b3a 100644 --- a/llvm/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll +++ b/llvm/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll @@ -13,6 +13,7 @@ define i32 @test(i32 %n) nounwind { ; CHECK-NEXT: .LBB0_1: @ %bb ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: bl f +; CHECK-NEXT: @ implicit-def: $r1 ; CHECK-NEXT: bl g ; CHECK-NEXT: subs r4, #1 ; CHECK-NEXT: bne .LBB0_1 @@ -58,6 +59,7 @@ define i32 @test_dead_cycle(i32 %n) nounwind { ; CHECK-NEXT: @ %bb.2: @ %bb1 ; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1 ; CHECK-NEXT: bl f +; CHECK-NEXT: @ implicit-def: $r1 ; CHECK-NEXT: bl g ; CHECK-NEXT: .LBB1_3: @ %bb2 ; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1 diff --git a/llvm/test/CodeGen/X86/issue76416.ll b/llvm/test/CodeGen/X86/issue76416.ll index 7193e54a6ad5..14786e5040da 100644 --- a/llvm/test/CodeGen/X86/issue76416.ll +++ b/llvm/test/CodeGen/X86/issue76416.ll @@ -26,6 +26,7 @@ define dso_local void @vga_load_state() #0 { ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_4: # %for.cond1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: # implicit-def: $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movq vga_load_state_p(%rip), %rax diff --git a/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll b/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll index bd997d164776..09d1dd9ccef9 100644 --- a/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll +++ b/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll @@ -24,6 +24,7 @@ define void @PR24199(i32 %a0) { ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: .LBB0_3: # %if.end ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: # implicit-def: $rdi ; CHECK-NEXT: callq foo@PLT ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 4-byte Reload diff --git a/llvm/test/CodeGen/X86/multi-use-implicit-def.mir b/llvm/test/CodeGen/X86/multi-use-implicit-def.mir new file mode 100644 index 000000000000..051157dc015b --- /dev/null +++ b/llvm/test/CodeGen/X86/multi-use-implicit-def.mir @@ -0,0 +1,20 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# Ensure processing an IMPLICIT_DEF of a physreg updates all uses +# before removing the IMPLICIT_DEF. -verify-machineinstrs will +# fail otherwise. + +# RUN: llc -mtriple=x86_64-- -x mir -stop-after processimpdefs < %s -verify-machineinstrs | FileCheck %s +--- +name: implicit_def +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: implicit_def + ; CHECK: MOV32mr $rip, 1, $noreg, 12, $noreg, undef $eax + ; CHECK-NEXT: RET 0, undef $eax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: .1.entry: + $eax = IMPLICIT_DEF + MOV32mr $rip, 1, $noreg, 12, $noreg, $eax + RET 0, $eax +... diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll index c24823538aa1..69a6cdb7081e 100644 --- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -67,10 +67,11 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) { ; CHECK-NEXT: cmpq %rax, %rcx ; CHECK-NEXT: jae LBB0_8 ; CHECK-NEXT: ## %bb.7: ## %for.body.lr.ph +; CHECK-NEXT: movq %rdi, %r14 +; CHECK-NEXT: ## implicit-def: $rdi ; CHECK-NEXT: movq %rdx, %rbx ; CHECK-NEXT: movl $512, %edx ## imm = 0x200 ; CHECK-NEXT: movl $32, %esi -; CHECK-NEXT: movq %rdi, %r14 ; CHECK-NEXT: callq _memset ; CHECK-NEXT: movq %r14, %rdi ; CHECK-NEXT: movq %rbx, %rdx @@ -156,6 +157,7 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) { ; CHECK-NEXT: jne LBB0_31 ; CHECK-NEXT: ## %bb.30: ## %lor.rhs500 ; CHECK-NEXT: ## in Loop: Header=BB0_28 Depth=2 +; CHECK-NEXT: ## implicit-def: $edi ; CHECK-NEXT: movl $256, %esi ## imm = 0x100 ; CHECK-NEXT: callq ___maskrune ; CHECK-NEXT: movb $1, %sil @@ -258,6 +260,8 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) { ; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload ; CHECK-NEXT: LBB0_47: ## %if.then1477 +; CHECK-NEXT: ## implicit-def: $edi +; CHECK-NEXT: ## implicit-def: $rsi ; CHECK-NEXT: movl $1, %edx ; CHECK-NEXT: callq _write ; CHECK-NEXT: subq %rbx, %r14 diff --git a/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll b/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll index ea1ca5190813..9663ab995d64 100644 --- a/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll +++ b/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll @@ -14,7 +14,7 @@ ; The new code choses %ebp as the split candidate as it has lower spill cost. ; Make sure the split behaves as expected -; CHECK: RS_Split Cascade 1 +; CHECK: RS_Split Cascade 0 ; CHECK-NOT: $eax static = ; CHECK: $eax no positive bundles ; CHECK-NEXT: $ecx no positive bundles diff --git a/llvm/test/CodeGen/X86/statepoint-two-results.ll b/llvm/test/CodeGen/X86/statepoint-two-results.ll index 4993c292dc55..82467841910b 100644 --- a/llvm/test/CodeGen/X86/statepoint-two-results.ll +++ b/llvm/test/CodeGen/X86/statepoint-two-results.ll @@ -8,6 +8,7 @@ define void @quux() gc "statepoint-example" { ; CHECK: # %bb.0: # %bb1 ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: # implicit-def: $rdi ; CHECK-NEXT: movl $4, %esi ; CHECK-NEXT: callq wombat@PLT ; CHECK-NEXT: .Ltmp0: diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll b/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll index ef542e5b1427..f08b388c3dc5 100644 --- a/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll @@ -183,6 +183,11 @@ define void @test_duplicate_ir_values() gc "statepoint-example" personality ptr ; CHECK-NEXT: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.0) ; CHECK-NEXT: EH_LABEL ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: dead $edi = IMPLICIT_DEF + ; CHECK-NEXT: dead $rsi = IMPLICIT_DEF + ; CHECK-NEXT: dead $edx = IMPLICIT_DEF + ; CHECK-NEXT: dead $ecx = IMPLICIT_DEF + ; CHECK-NEXT: dead $r8d = IMPLICIT_DEF ; CHECK-NEXT: STATEPOINT 1, 16, 5, undef renamable $rax, undef $edi, undef $rsi, undef $edx, undef $ecx, undef $r8d, 2, 0, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (volatile load store (s64) on %stack.0) ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK-NEXT: EH_LABEL diff --git a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll index 72e4fe410e26..f3e6d8b5489c 100644 --- a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll +++ b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll @@ -109,6 +109,7 @@ define i32 @loop_shared_header(ptr %exe, i32 %exesz, i32 %headsize, i32 %min, i3 ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB1_12 ; CHECK-NEXT: # %bb.2: # %if.end50 +; CHECK-NEXT: # implicit-def: $rsi ; CHECK-NEXT: movq %r14, %rdi ; CHECK-NEXT: movq %r15, %rdx ; CHECK-NEXT: callq memcpy@PLT @@ -170,6 +171,7 @@ define i32 @loop_shared_header(ptr %exe, i32 %exesz, i32 %headsize, i32 %min, i3 ; CHECK-NEXT: .LBB1_11: # %if.then99.i ; CHECK-NEXT: movq .str.6@GOTPCREL(%rip), %rdi ; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: # implicit-def: $esi ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: callq cli_dbgmsg@PLT ; CHECK-NEXT: .LBB1_12: # %cleanup