[MachineScheduler] Fix physreg dependencies of ExitSU (#123541)

Providing the correct operand index allows addPhysRegDataDeps to compute
the correct latency.

Pull Request: https://github.com/llvm/llvm-project/pull/123541
This commit is contained in:
Sergei Barannikov 2025-02-01 20:40:50 +03:00 committed by GitHub
parent 15336823ad
commit ff9c041d96
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
58 changed files with 1315 additions and 1242 deletions

View File

@ -209,13 +209,25 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
ExitSU.setInstr(ExitMI);
// Add dependencies on the defs and uses of the instruction.
if (ExitMI) {
const MCInstrDesc &MIDesc = ExitMI->getDesc();
for (const MachineOperand &MO : ExitMI->all_uses()) {
unsigned OpIdx = MO.getOperandNo();
Register Reg = MO.getReg();
if (Reg.isPhysical()) {
// addPhysRegDataDeps uses the provided operand index to retrieve
// the operand use cycle from the scheduling model. If the operand
// is "fake" (e.g., an operand of a call instruction used to pass
// an argument to the called function.), the scheduling model may not
// have an entry for it. If this is the case, pass -1 as operand index,
// which will cause addPhysRegDataDeps to add an artificial dependency.
// FIXME: Using hasImplicitUseOfPhysReg here is inaccurate as it misses
// aliases. When fixing, make sure to update addPhysRegDataDeps, too.
bool IsRealUse = OpIdx < MIDesc.getNumOperands() ||
MIDesc.hasImplicitUseOfPhysReg(Reg);
for (MCRegUnit Unit : TRI->regunits(Reg))
Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit));
Uses.insert(PhysRegSUOper(&ExitSU, IsRealUse ? OpIdx : -1, Unit));
} else if (Reg.isVirtual() && MO.readsReg()) {
addVRegUseDeps(&ExitSU, MO.getOperandNo());
addVRegUseDeps(&ExitSU, OpIdx);
}
}
}

View File

@ -15,12 +15,12 @@ define ptr addrspace(1) @call_assert_align() {
; CHECK-NEXT: v_writelane_b32 v40, s16, 2
; CHECK-NEXT: s_addk_i32 s32, 0x400
; CHECK-NEXT: v_writelane_b32 v40, s30, 0
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, ext@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, ext@rel32@hi+12
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mov_b32_e32 v2, 0
; CHECK-NEXT: global_store_dword v[0:1], v2, off
@ -45,11 +45,11 @@ define ptr addrspace(1) @tail_call_assert_align() {
; CHECK-LABEL: tail_call_assert_align:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, ext@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, ext@rel32@hi+12
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_setpc_b64 s[16:17]
entry:
%call = tail call align 4 ptr addrspace(1) @ext(ptr addrspace(1) null)

View File

@ -44,8 +44,8 @@ define amdgpu_kernel void @kernel_caller_stack() {
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
; FLATSCR-NEXT: s_add_u32 s0, s32, 12
; FLATSCR-NEXT: v_mov_b32_e32 v0, 11
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
; FLATSCR-NEXT: s_add_u32 s2, s32, 16
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
; FLATSCR-NEXT: v_mov_b32_e32 v0, 12
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4
@ -239,11 +239,11 @@ define void @func_caller_stack() {
; MUBUF-NEXT: v_writelane_b32 v40, s30, 0
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
; MUBUF-NEXT: v_mov_b32_e32 v0, 12
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
; MUBUF-NEXT: s_getpc_b64 s[4:5]
; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4
; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5]
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
@ -274,15 +274,15 @@ define void @func_caller_stack() {
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
; FLATSCR-NEXT: s_add_u32 s0, s32, 12
; FLATSCR-NEXT: v_mov_b32_e32 v0, 11
; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
; FLATSCR-NEXT: s_add_u32 s0, s32, 16
; FLATSCR-NEXT: v_mov_b32_e32 v0, 12
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
@ -312,10 +312,10 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) {
; MUBUF-NEXT: s_addk_i32 s32, 0x400
; MUBUF-NEXT: v_writelane_b32 v40, s4, 2
; MUBUF-NEXT: v_writelane_b32 v40, s30, 0
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
; MUBUF-NEXT: s_getpc_b64 s[4:5]
; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_byval@rel32@lo+4
; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_byval@rel32@hi+12
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
; MUBUF-NEXT: s_waitcnt vmcnt(1)
; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32
; MUBUF-NEXT: s_waitcnt vmcnt(1)
@ -394,8 +394,8 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) {
; FLATSCR-NEXT: v_add_u32_e32 v3, 8, v0
; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2
; FLATSCR-NEXT: s_add_u32 s0, s32, 8
; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
; FLATSCR-NEXT: s_add_u32 s2, s32, 56
; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s32

View File

@ -191,9 +191,9 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
; GFX10-LABEL: divergent_i1_xor_used_outside_loop_larger_loop_body:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-NEXT: s_mov_b32 s5, 0
; GFX10-NEXT: s_mov_b32 s6, -1
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
; GFX10-NEXT: s_cbranch_execz .LBB3_6
; GFX10-NEXT: ; %bb.1: ; %loop.start.preheader

View File

@ -387,8 +387,8 @@ define amdgpu_ps void @and_i1_scc(i32 inreg %a, i32 inreg %b, ptr addrspace(1) %
define amdgpu_ps void @divergent_phi_with_uniform_inputs(i32 %a, ptr addrspace(1) %out) {
; OLD_RBS-LABEL: divergent_phi_with_uniform_inputs:
; OLD_RBS: ; %bb.0: ; %A
; OLD_RBS-NEXT: s_mov_b32 s0, 0
; OLD_RBS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; OLD_RBS-NEXT: s_mov_b32 s0, 0
; OLD_RBS-NEXT: s_and_saveexec_b32 s1, vcc_lo
; OLD_RBS-NEXT: ; %bb.1: ; %B
; OLD_RBS-NEXT: s_mov_b32 s0, 1

View File

@ -25,10 +25,10 @@ define void @parent_func_missing_inputs() #0 {
; FIXEDABI-NEXT: v_writelane_b32 v40, s16, 2
; FIXEDABI-NEXT: s_addk_i32 s32, 0x400
; FIXEDABI-NEXT: v_writelane_b32 v40, s30, 0
; FIXEDABI-NEXT: v_writelane_b32 v40, s31, 1
; FIXEDABI-NEXT: s_getpc_b64 s[16:17]
; FIXEDABI-NEXT: s_add_u32 s16, s16, requires_all_inputs@rel32@lo+4
; FIXEDABI-NEXT: s_addc_u32 s17, s17, requires_all_inputs@rel32@hi+12
; FIXEDABI-NEXT: v_writelane_b32 v40, s31, 1
; FIXEDABI-NEXT: s_swappc_b64 s[30:31], s[16:17]
; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1
; FIXEDABI-NEXT: v_readlane_b32 s30, v40, 0
@ -49,21 +49,21 @@ define amdgpu_kernel void @parent_kernel_missing_inputs() #0 {
; FIXEDABI-SDAG: ; %bb.0:
; FIXEDABI-SDAG-NEXT: s_add_i32 s4, s4, s9
; FIXEDABI-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
; FIXEDABI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; FIXEDABI-SDAG-NEXT: s_add_u32 s0, s0, s9
; FIXEDABI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; FIXEDABI-SDAG-NEXT: s_addc_u32 s1, s1, 0
; FIXEDABI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; FIXEDABI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; FIXEDABI-SDAG-NEXT: s_addc_u32 s1, s1, 0
; FIXEDABI-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s5
; FIXEDABI-SDAG-NEXT: s_mov_b32 s14, s8
; FIXEDABI-SDAG-NEXT: s_getpc_b64 s[4:5]
; FIXEDABI-SDAG-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4
; FIXEDABI-SDAG-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12
; FIXEDABI-SDAG-NEXT: v_or_b32_e32 v31, v0, v2
; FIXEDABI-SDAG-NEXT: s_mov_b64 s[8:9], 0
; FIXEDABI-SDAG-NEXT: s_mov_b32 s12, s6
; FIXEDABI-SDAG-NEXT: s_mov_b32 s13, s7
; FIXEDABI-SDAG-NEXT: s_mov_b32 s32, 0
; FIXEDABI-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s5
; FIXEDABI-SDAG-NEXT: s_getpc_b64 s[4:5]
; FIXEDABI-SDAG-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4
; FIXEDABI-SDAG-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12
; FIXEDABI-SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
; FIXEDABI-SDAG-NEXT: s_endpgm
;
@ -71,21 +71,21 @@ define amdgpu_kernel void @parent_kernel_missing_inputs() #0 {
; FIXEDABI-GISEL: ; %bb.0:
; FIXEDABI-GISEL-NEXT: s_add_i32 s4, s4, s9
; FIXEDABI-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
; FIXEDABI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; FIXEDABI-GISEL-NEXT: s_add_u32 s0, s0, s9
; FIXEDABI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; FIXEDABI-GISEL-NEXT: s_addc_u32 s1, s1, 0
; FIXEDABI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
; FIXEDABI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2
; FIXEDABI-GISEL-NEXT: s_addc_u32 s1, s1, 0
; FIXEDABI-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s5
; FIXEDABI-GISEL-NEXT: s_mov_b32 s14, s8
; FIXEDABI-GISEL-NEXT: v_or_b32_e32 v31, v0, v1
; FIXEDABI-GISEL-NEXT: s_getpc_b64 s[4:5]
; FIXEDABI-GISEL-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4
; FIXEDABI-GISEL-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12
; FIXEDABI-GISEL-NEXT: s_mov_b64 s[8:9], 0
; FIXEDABI-GISEL-NEXT: s_mov_b32 s12, s6
; FIXEDABI-GISEL-NEXT: s_mov_b32 s13, s7
; FIXEDABI-GISEL-NEXT: s_mov_b32 s32, 0
; FIXEDABI-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s5
; FIXEDABI-GISEL-NEXT: s_getpc_b64 s[4:5]
; FIXEDABI-GISEL-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4
; FIXEDABI-GISEL-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12
; FIXEDABI-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
; FIXEDABI-GISEL-NEXT: s_endpgm
call void @requires_all_inputs()

View File

@ -1286,9 +1286,9 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s4, -1
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s5, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s4
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: s_mov_b32 s4, s6
; GFX1032_DPP-NEXT: s_mov_b32 s6, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB2_2
@ -1412,9 +1412,10 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s4, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s5, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s4
; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: s_mov_b32 s4, s6
; GFX1132_DPP-NEXT: s_mov_b32 s6, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB2_2
@ -1540,9 +1541,10 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1232_DPP-NEXT: s_or_saveexec_b32 s4, -1
; GFX1232_DPP-NEXT: v_writelane_b32 v3, s5, 16
; GFX1232_DPP-NEXT: s_mov_b32 exec_lo, s4
; GFX1232_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1232_DPP-NEXT: s_mov_b32 s4, s6
; GFX1232_DPP-NEXT: s_mov_b32 s6, -1
; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1232_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1232_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1232_DPP-NEXT: s_cbranch_execz .LBB2_2
@ -3129,8 +3131,8 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1032_DPP-NEXT: v_writelane_b32 v8, s8, 16
; GFX1032_DPP-NEXT: v_writelane_b32 v7, s7, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s6
; GFX1032_DPP-NEXT: s_mov_b32 s6, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: s_mov_b32 s6, -1
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB5_2
@ -4839,9 +4841,9 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s4, -1
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s5, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s4
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: s_mov_b32 s4, s6
; GFX1032_DPP-NEXT: s_mov_b32 s6, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB8_2
@ -4965,9 +4967,10 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s4, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s5, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s4
; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: s_mov_b32 s4, s6
; GFX1132_DPP-NEXT: s_mov_b32 s6, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB8_2
@ -5093,9 +5096,10 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1232_DPP-NEXT: s_or_saveexec_b32 s4, -1
; GFX1232_DPP-NEXT: v_writelane_b32 v3, s5, 16
; GFX1232_DPP-NEXT: s_mov_b32 exec_lo, s4
; GFX1232_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1232_DPP-NEXT: s_mov_b32 s4, s6
; GFX1232_DPP-NEXT: s_mov_b32 s6, -1
; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1232_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1232_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1232_DPP-NEXT: s_cbranch_execz .LBB8_2
@ -6715,8 +6719,8 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1032_DPP-NEXT: v_writelane_b32 v8, s8, 16
; GFX1032_DPP-NEXT: v_writelane_b32 v7, s7, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s6
; GFX1032_DPP-NEXT: s_mov_b32 s6, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: s_mov_b32 s6, -1
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB11_2

View File

@ -919,9 +919,9 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) {
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB2_2
@ -1030,9 +1030,9 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB2_2
@ -2630,8 +2630,8 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out) {
; GFX1032_DPP-NEXT: v_writelane_b32 v8, s6, 16
; GFX1032_DPP-NEXT: v_writelane_b32 v7, s3, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB6_2
@ -2812,8 +2812,8 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v7, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr8_vgpr9
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB6_2
@ -3301,8 +3301,8 @@ define amdgpu_kernel void @add_i64_varying_nouse() {
; GFX1032_DPP-NEXT: v_add_co_u32 v1, vcc_lo, v1, v3
; GFX1032_DPP-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v4, vcc_lo
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032_DPP-NEXT: v_mov_b32_e32 v7, v1
; GFX1032_DPP-NEXT: v_mbcnt_lo_u32_b32 v9, exec_lo, 0
; GFX1032_DPP-NEXT: v_mov_b32_e32 v7, v1
; GFX1032_DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032_DPP-NEXT: v_mov_b32_e32 v8, v2
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9
@ -4341,9 +4341,9 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) {
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB10_2
@ -4452,9 +4452,9 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB10_2
@ -6075,8 +6075,8 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out) {
; GFX1032_DPP-NEXT: v_writelane_b32 v8, s6, 16
; GFX1032_DPP-NEXT: v_writelane_b32 v7, s3, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB14_2
@ -6257,8 +6257,8 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v7, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr8_vgpr9
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB14_2
@ -6757,9 +6757,9 @@ define amdgpu_kernel void @and_i32_varying(ptr addrspace(1) %out) {
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB15_2
@ -6868,9 +6868,10 @@ define amdgpu_kernel void @and_i32_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB15_2
@ -7464,8 +7465,8 @@ define amdgpu_kernel void @and_i64_varying(ptr addrspace(1) %out) {
; GFX1032_DPP-NEXT: v_writelane_b32 v6, s3, 16
; GFX1032_DPP-NEXT: v_writelane_b32 v5, s6, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB16_2
@ -7621,8 +7622,8 @@ define amdgpu_kernel void @and_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB16_2
@ -8122,9 +8123,9 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) {
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB17_2
@ -8233,9 +8234,9 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB17_2
@ -8828,8 +8829,8 @@ define amdgpu_kernel void @or_i64_varying(ptr addrspace(1) %out) {
; GFX1032_DPP-NEXT: v_writelane_b32 v6, s3, 16
; GFX1032_DPP-NEXT: v_writelane_b32 v5, s6, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB18_2
@ -8985,8 +8986,8 @@ define amdgpu_kernel void @or_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB18_2
@ -9486,9 +9487,9 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) {
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB19_2
@ -9597,9 +9598,9 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB19_2
@ -10192,8 +10193,8 @@ define amdgpu_kernel void @xor_i64_varying(ptr addrspace(1) %out) {
; GFX1032_DPP-NEXT: v_writelane_b32 v6, s3, 16
; GFX1032_DPP-NEXT: v_writelane_b32 v5, s6, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB20_2
@ -10349,8 +10350,8 @@ define amdgpu_kernel void @xor_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB20_2
@ -10849,9 +10850,9 @@ define amdgpu_kernel void @max_i32_varying(ptr addrspace(1) %out) {
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB21_2
@ -10960,9 +10961,10 @@ define amdgpu_kernel void @max_i32_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB21_2
@ -11967,8 +11969,8 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
; GFX1032_DPP-NEXT: v_writelane_b32 v5, s3, 16
; GFX1032_DPP-NEXT: v_writelane_b32 v4, s6, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB23_2
@ -12181,8 +12183,8 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v4, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB23_2
@ -12682,9 +12684,9 @@ define amdgpu_kernel void @min_i32_varying(ptr addrspace(1) %out) {
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB24_2
@ -12793,9 +12795,10 @@ define amdgpu_kernel void @min_i32_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB24_2
@ -13800,8 +13803,8 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
; GFX1032_DPP-NEXT: v_writelane_b32 v5, s3, 16
; GFX1032_DPP-NEXT: v_writelane_b32 v4, s6, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB26_2
@ -14014,8 +14017,8 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v4, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB26_2
@ -14516,9 +14519,9 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) {
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB27_2
@ -14627,9 +14630,9 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB27_2
@ -15625,8 +15628,8 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
; GFX1032_DPP-NEXT: v_writelane_b32 v5, s3, 16
; GFX1032_DPP-NEXT: v_writelane_b32 v4, s6, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB29_2
@ -15833,8 +15836,8 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v4, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB29_2
@ -16334,9 +16337,9 @@ define amdgpu_kernel void @umin_i32_varying(ptr addrspace(1) %out) {
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB30_2
@ -16445,9 +16448,10 @@ define amdgpu_kernel void @umin_i32_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB30_2
@ -17442,8 +17446,8 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
; GFX1032_DPP-NEXT: v_writelane_b32 v5, s3, 16
; GFX1032_DPP-NEXT: v_writelane_b32 v4, s6, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB32_2
@ -17650,8 +17654,8 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s3, 16
; GFX1132_DPP-NEXT: v_writelane_b32 v4, s6, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB32_2

View File

@ -43,11 +43,11 @@ define void @test_call_external_void_func_i8_inreg(i8 inreg %arg) #0 {
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[18:19]
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i8_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i8_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -103,11 +103,11 @@ define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 {
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[18:19]
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i16_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i16_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -163,11 +163,11 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 {
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[18:19]
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i32_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i32_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -223,12 +223,12 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 {
; GFX9-NEXT: v_writelane_b32 v40, s18, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[18:19]
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i64_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i64_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -284,12 +284,12 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 {
; GFX9-NEXT: v_writelane_b32 v40, s18, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[18:19]
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2i32_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2i32_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -345,13 +345,13 @@ define void @test_call_external_void_func_v3i32_inreg(<3 x i32> inreg %arg) #0 {
; GFX9-NEXT: v_writelane_b32 v40, s19, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_getpc_b64 s[20:21]
; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v3i32_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v3i32_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s2, s18
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[20:21]
; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v3i32_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v3i32_inreg@rel32@hi+12
; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -407,14 +407,14 @@ define void @test_call_external_void_func_v4i32_inreg(<4 x i32> inreg %arg) #0 {
; GFX9-NEXT: v_writelane_b32 v40, s20, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_getpc_b64 s[20:21]
; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v4i32_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v4i32_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s3, s19
; GFX9-NEXT: s_mov_b32 s2, s18
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[20:21]
; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v4i32_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v4i32_inreg@rel32@hi+12
; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -470,6 +470,9 @@ define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 {
; GFX9-NEXT: v_writelane_b32 v40, s24, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_getpc_b64 s[24:25]
; GFX9-NEXT: s_add_u32 s24, s24, external_void_func_v8i32_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s25, s25, external_void_func_v8i32_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s3, s19
; GFX9-NEXT: s_mov_b32 s2, s18
; GFX9-NEXT: s_mov_b32 s1, s17
@ -479,9 +482,6 @@ define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 {
; GFX9-NEXT: s_mov_b32 s18, s22
; GFX9-NEXT: s_mov_b32 s19, s23
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[24:25]
; GFX9-NEXT: s_add_u32 s24, s24, external_void_func_v8i32_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s25, s25, external_void_func_v8i32_inreg@rel32@hi+12
; GFX9-NEXT: s_swappc_b64 s[30:31], s[24:25]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -537,11 +537,11 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 {
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[18:19]
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_f16_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_f16_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -597,11 +597,11 @@ define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[18:19]
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_bf16_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_bf16_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -657,11 +657,11 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 {
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[18:19]
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_f32_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_f32_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -717,12 +717,12 @@ define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 {
; GFX9-NEXT: v_writelane_b32 v40, s18, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[18:19]
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_f64_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_f64_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -778,11 +778,11 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[18:19]
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2f16_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2f16_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -839,11 +839,11 @@ define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg)
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[18:19]
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2bf16_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2bf16_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -899,12 +899,12 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0
; GFX9-NEXT: v_writelane_b32 v40, s18, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[18:19]
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v3f16_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v3f16_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -960,12 +960,12 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0
; GFX9-NEXT: v_writelane_b32 v40, s18, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[18:19]
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v4f16_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v4f16_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -1021,12 +1021,12 @@ define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 {
; GFX9-NEXT: v_writelane_b32 v40, s18, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[18:19]
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_p0_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_p0_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -1082,12 +1082,12 @@ define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg)
; GFX9-NEXT: v_writelane_b32 v40, s18, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[18:19]
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_p1_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_p1_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -1143,11 +1143,11 @@ define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg)
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[18:19]
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_p3_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_p3_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -1203,14 +1203,14 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre
; GFX9-NEXT: v_writelane_b32 v40, s20, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_getpc_b64 s[20:21]
; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v2p1_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v2p1_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s3, s19
; GFX9-NEXT: s_mov_b32 s2, s18
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[20:21]
; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v2p1_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v2p1_inreg@rel32@hi+12
; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -1266,12 +1266,12 @@ define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inre
; GFX9-NEXT: v_writelane_b32 v40, s18, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[18:19]
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2p5_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2p5_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -1327,15 +1327,15 @@ define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inre
; GFX9-NEXT: v_writelane_b32 v40, s21, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_getpc_b64 s[22:23]
; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s3, s19
; GFX9-NEXT: s_mov_b32 s2, s18
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: s_mov_b32 s16, s20
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[22:23]
; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@hi+12
; GFX9-NEXT: s_swappc_b64 s[30:31], s[22:23]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -1391,6 +1391,9 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) #
; GFX9-NEXT: v_writelane_b32 v40, s29, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_getpc_b64 vcc
; GFX9-NEXT: s_add_u32 vcc_lo, vcc_lo, external_void_func_a15i32_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 vcc_hi, vcc_hi, external_void_func_a15i32_inreg@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s3, s19
; GFX9-NEXT: s_mov_b32 s2, s18
; GFX9-NEXT: s_mov_b32 s1, s17
@ -1405,9 +1408,6 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) #
; GFX9-NEXT: s_mov_b32 s23, s27
; GFX9-NEXT: s_mov_b32 s24, s28
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 vcc
; GFX9-NEXT: s_add_u32 vcc_lo, vcc_lo, external_void_func_a15i32_inreg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 vcc_hi, vcc_hi, external_void_func_a15i32_inreg@rel32@hi+12
; GFX9-NEXT: s_swappc_b64 s[30:31], vcc
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
@ -1465,6 +1465,9 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inre
; GFX9-NEXT: v_writelane_b32 v40, s21, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_getpc_b64 s[22:23]
; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@hi+12
; GFX9-NEXT: s_mov_b32 s3, s7
; GFX9-NEXT: s_mov_b32 s2, s6
; GFX9-NEXT: s_mov_b32 s1, s5
@ -1480,9 +1483,6 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inre
; GFX9-NEXT: s_mov_b32 s15, s19
; GFX9-NEXT: s_mov_b32 s16, s20
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[22:23]
; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@hi+12
; GFX9-NEXT: s_swappc_b64 s[30:31], s[22:23]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0

File diff suppressed because it is too large Load Diff

View File

@ -102,10 +102,10 @@ define hidden void @void_func_void_clobber_vcc() #2 {
}
; GCN-LABEL: {{^}}test_call_void_func_void_clobber_vcc:
; GCN: s_mov_b64 s[34:35], vcc
; GCN-NEXT: s_getpc_b64
; GCN: s_getpc_b64
; GCN-NEXT: s_add_u32
; GCN-NEXT: s_addc_u32
; GCN: s_mov_b64 s[34:35], vcc
; GCN-NEXT: s_swappc_b64
; GCN: s_mov_b64 vcc, s[34:35]
define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(ptr addrspace(1) %out) #0 {
@ -142,21 +142,27 @@ define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(ptr addrspace
; FIXME: What is the expected behavior for reserved registers here?
; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33:
; GCN: #ASMSTART
; GCN-NEXT: ; def s33
; GCN-NEXT: #ASMEND
; FLATSCR: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; MUBUF: s_getpc_b64 s[4:5]
; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN: #ASMSTART
; GCN-NEXT: ; def s33
; GCN-NEXT: #ASMEND
; GCN-NOT: s33
; FLATSCR: s_swappc_b64 s[30:31], s[0:1]
; MUBUF: s_swappc_b64 s[30:31], s[4:5]
; GCN-NOT: s33
; GCN: ;;#ASMSTART
; GCN-NEXT: ; use s33
; GCN-NEXT: ;;#ASMEND
; GCN-NOT: s33
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_void_func_void_preserves_s33(ptr addrspace(1) %out) #0 {
%s33 = call i32 asm sideeffect "; def $0", "={s33}"()
@ -168,20 +174,20 @@ define amdgpu_kernel void @test_call_void_func_void_preserves_s33(ptr addrspace(
; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s34: {{.*}}
; GCN-NOT: s34
; GCN: s_mov_b32 s32, 0
; GCN-NOT: s34
; GCN: ;;#ASMSTART
; GCN-NEXT: ; def s34
; GCN-NEXT: ;;#ASMEND
; FLATSCR: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; MUBUF: s_getpc_b64 s[4:5]
; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
; GCN: s_mov_b32 s32, 0
; GCN: ;;#ASMSTART
; GCN-NEXT: ; def s34
; GCN-NEXT: ;;#ASMEND
; GCN-NOT: s34
; MUBUF: s_swappc_b64 s[30:31], s[4:5]
; FLATSCR: s_swappc_b64 s[30:31], s[0:1]
@ -200,19 +206,19 @@ define amdgpu_kernel void @test_call_void_func_void_preserves_s34(ptr addrspace(
; GCN-LABEL: {{^}}test_call_void_func_void_preserves_v40: {{.*}}
; GCN-NOT: v32
; GCN: s_mov_b32 s32, 0
; GCN-NOT: v40
; GCN: ;;#ASMSTART
; GCN-NEXT: ; def v40
; GCN-NEXT: ;;#ASMEND
; MUBUF: s_getpc_b64 s[4:5]
; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
; FLATSCR: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; GCN: s_mov_b32 s32, 0
; GCN: ;;#ASMSTART
; GCN-NEXT: ; def v40
; GCN-NEXT: ;;#ASMEND
; GCN-NOT: v40
; MUBUF: s_swappc_b64 s[30:31], s[4:5]
; FLATSCR: s_swappc_b64 s[30:31], s[0:1]
@ -255,10 +261,10 @@ define hidden void @void_func_void_clobber_s34() #2 {
}
; GCN-LABEL: {{^}}test_call_void_func_void_clobber_s33:
; GCN: s_mov_b32 s32, 0
; GCN: s_getpc_b64
; GCN-NEXT: s_add_u32
; GCN-NEXT: s_addc_u32
; GCN: s_mov_b32 s32, 0
; GCN: s_swappc_b64
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_void_func_void_clobber_s33() #0 {
@ -267,10 +273,10 @@ define amdgpu_kernel void @test_call_void_func_void_clobber_s33() #0 {
}
; GCN-LABEL: {{^}}test_call_void_func_void_clobber_s34:
; GCN: s_mov_b32 s32, 0
; GCN: s_getpc_b64
; GCN-NEXT: s_add_u32
; GCN-NEXT: s_addc_u32
; GCN: s_mov_b32 s32, 0
; GCN: s_swappc_b64
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_void_func_void_clobber_s34() #0 {

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck %s -check-prefix=GISEL
; Check for optimizing the passed implicit workitem ID based on the
; required group size. This should avoid a few bit packing operations.
@ -13,15 +13,30 @@ define amdgpu_kernel void @known_x_0(ptr addrspace(1) %out) !reqd_work_group_siz
; CHECK-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 20, v2
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: v_lshl_or_b32 v31, v1, 10, v0
; CHECK-NEXT: s_mov_b32 s32, 0
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 20, v2
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
; CHECK-NEXT: v_lshl_or_b32 v31, v1, 10, v0
; CHECK-NEXT: s_mov_b32 s32, 0
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CHECK-NEXT: s_endpgm
;
; GISEL-LABEL: known_x_0:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; GISEL-NEXT: s_add_u32 s0, s0, s17
; GISEL-NEXT: s_addc_u32 s1, s1, 0
; GISEL-NEXT: v_lshlrev_b32_e32 v0, 20, v2
; GISEL-NEXT: v_lshl_or_b32 v31, v1, 10, v0
; GISEL-NEXT: s_getpc_b64 s[4:5]
; GISEL-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
; GISEL-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
; GISEL-NEXT: s_mov_b32 s32, 0
; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GISEL-NEXT: s_endpgm
call void @callee()
ret void
}
@ -34,13 +49,27 @@ define amdgpu_kernel void @known_y_0(ptr addrspace(1) %out) !reqd_work_group_siz
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: v_lshl_or_b32 v31, v2, 20, v0
; CHECK-NEXT: s_mov_b32 s32, 0
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
; CHECK-NEXT: v_lshl_or_b32 v31, v2, 20, v0
; CHECK-NEXT: s_mov_b32 s32, 0
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CHECK-NEXT: s_endpgm
;
; GISEL-LABEL: known_y_0:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; GISEL-NEXT: s_add_u32 s0, s0, s17
; GISEL-NEXT: s_addc_u32 s1, s1, 0
; GISEL-NEXT: v_lshl_or_b32 v31, v2, 20, v0
; GISEL-NEXT: s_getpc_b64 s[4:5]
; GISEL-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
; GISEL-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
; GISEL-NEXT: s_mov_b32 s32, 0
; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GISEL-NEXT: s_endpgm
call void @callee()
ret void
}
@ -53,13 +82,27 @@ define amdgpu_kernel void @known_z_0(ptr addrspace(1) %out) !reqd_work_group_siz
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: v_lshl_or_b32 v31, v1, 10, v0
; CHECK-NEXT: s_mov_b32 s32, 0
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
; CHECK-NEXT: v_lshl_or_b32 v31, v1, 10, v0
; CHECK-NEXT: s_mov_b32 s32, 0
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CHECK-NEXT: s_endpgm
;
; GISEL-LABEL: known_z_0:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; GISEL-NEXT: s_add_u32 s0, s0, s17
; GISEL-NEXT: s_addc_u32 s1, s1, 0
; GISEL-NEXT: v_lshl_or_b32 v31, v1, 10, v0
; GISEL-NEXT: s_getpc_b64 s[4:5]
; GISEL-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
; GISEL-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
; GISEL-NEXT: s_mov_b32 s32, 0
; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GISEL-NEXT: s_endpgm
call void @callee()
ret void
}
@ -72,13 +115,27 @@ define amdgpu_kernel void @known_yz_0(ptr addrspace(1) %out) !reqd_work_group_si
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: v_mov_b32_e32 v31, v0
; CHECK-NEXT: s_mov_b32 s32, 0
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
; CHECK-NEXT: v_mov_b32_e32 v31, v0
; CHECK-NEXT: s_mov_b32 s32, 0
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CHECK-NEXT: s_endpgm
;
; GISEL-LABEL: known_yz_0:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; GISEL-NEXT: s_add_u32 s0, s0, s17
; GISEL-NEXT: s_addc_u32 s1, s1, 0
; GISEL-NEXT: s_getpc_b64 s[4:5]
; GISEL-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
; GISEL-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
; GISEL-NEXT: v_mov_b32_e32 v31, v0
; GISEL-NEXT: s_mov_b32 s32, 0
; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GISEL-NEXT: s_endpgm
call void @callee()
ret void
}
@ -91,13 +148,27 @@ define amdgpu_kernel void @known_xz_0(ptr addrspace(1) %out) !reqd_work_group_si
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: v_lshlrev_b32_e32 v31, 10, v1
; CHECK-NEXT: s_mov_b32 s32, 0
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
; CHECK-NEXT: v_lshlrev_b32_e32 v31, 10, v1
; CHECK-NEXT: s_mov_b32 s32, 0
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CHECK-NEXT: s_endpgm
;
; GISEL-LABEL: known_xz_0:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; GISEL-NEXT: s_add_u32 s0, s0, s17
; GISEL-NEXT: s_addc_u32 s1, s1, 0
; GISEL-NEXT: v_lshlrev_b32_e32 v31, 10, v1
; GISEL-NEXT: s_getpc_b64 s[4:5]
; GISEL-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
; GISEL-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
; GISEL-NEXT: s_mov_b32 s32, 0
; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GISEL-NEXT: s_endpgm
call void @callee()
ret void
}
@ -111,13 +182,27 @@ define amdgpu_kernel void @known_xyz_0(ptr addrspace(1) %out) !reqd_work_group_s
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: v_mov_b32_e32 v31, 0
; CHECK-NEXT: s_mov_b32 s32, 0
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
; CHECK-NEXT: v_mov_b32_e32 v31, 0
; CHECK-NEXT: s_mov_b32 s32, 0
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CHECK-NEXT: s_endpgm
;
; GISEL-LABEL: known_xyz_0:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; GISEL-NEXT: s_add_u32 s0, s0, s17
; GISEL-NEXT: s_addc_u32 s1, s1, 0
; GISEL-NEXT: s_getpc_b64 s[4:5]
; GISEL-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
; GISEL-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
; GISEL-NEXT: v_mov_b32_e32 v31, 0
; GISEL-NEXT: s_mov_b32 s32, 0
; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GISEL-NEXT: s_endpgm
call void @callee()
ret void
}

View File

@ -13,11 +13,11 @@ define amdgpu_kernel void @call_memory_arg_load(ptr addrspace(3) %ptr, i32) #0 {
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s6
; GCN-NEXT: ds_read_b32 v0, v0
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_getpc_b64 s[8:9]
; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
; GCN-NEXT: s_endpgm
%vgpr = load volatile i32, ptr addrspace(3) %ptr
@ -33,16 +33,16 @@ define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 {
; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11
; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
; GCN-NEXT: s_add_u32 s0, s0, s11
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_addc_u32 s1, s1, 0
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_getpc_b64 s[8:9]
; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_store_dword v0, v0, s[6:7]
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_getpc_b64 s[8:9]
; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
; GCN-NEXT: s_endpgm
store i32 0, ptr addrspace(1) %ptr
@ -55,16 +55,16 @@ define amdgpu_kernel void @call_no_wait_after_call(ptr addrspace(1) %ptr, i32) #
; GCN-LABEL: call_no_wait_after_call:
; GCN: ; %bb.0:
; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11
; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
; GCN-NEXT: s_add_u32 s0, s0, s11
; GCN-NEXT: s_addc_u32 s1, s1, 0
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_getpc_b64 s[8:9]
; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: v_mov_b32_e32 v40, 0
; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
; GCN-NEXT: global_store_dword v40, v40, s[34:35]
@ -78,16 +78,16 @@ define amdgpu_kernel void @call_no_wait_after_call_return_val(ptr addrspace(1) %
; GCN-LABEL: call_no_wait_after_call_return_val:
; GCN: ; %bb.0:
; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11
; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
; GCN-NEXT: s_add_u32 s0, s0, s11
; GCN-NEXT: s_addc_u32 s1, s1, 0
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_getpc_b64 s[8:9]
; GCN-NEXT: s_add_u32 s8, s8, func.return@rel32@lo+4
; GCN-NEXT: s_addc_u32 s9, s9, func.return@rel32@hi+12
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: v_mov_b32_e32 v40, 0
; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
; GCN-NEXT: global_store_dword v40, v0, s[34:35]

View File

@ -125,12 +125,12 @@ define void @callee_with_stack_and_call() #0 {
; MUBUF-NEXT: s_addk_i32 s32, 0x400
; MUBUF-NEXT: v_writelane_b32 v40, s30, 0
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_getpc_b64 s[16:17]
; MUBUF-NEXT: s_add_u32 s16, s16, external_void_func_void@rel32@lo+4
; MUBUF-NEXT: s_addc_u32 s17, s17, external_void_func_void@rel32@hi+12
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[16:17]
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
@ -155,12 +155,12 @@ define void @callee_with_stack_and_call() #0 {
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
; FLATSCR-NEXT: scratch_store_dword off, v0, s33
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
; FLATSCR-NEXT: scratch_store_dword off, v0, s33
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
@ -196,10 +196,10 @@ define void @callee_no_stack_with_call() #0 {
; MUBUF-NEXT: v_writelane_b32 v40, s16, 2
; MUBUF-NEXT: s_addk_i32 s32, 0x400
; MUBUF-NEXT: v_writelane_b32 v40, s30, 0
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
; MUBUF-NEXT: s_getpc_b64 s[16:17]
; MUBUF-NEXT: s_add_u32 s16, s16, external_void_func_void@rel32@lo+4
; MUBUF-NEXT: s_addc_u32 s17, s17, external_void_func_void@rel32@hi+12
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[16:17]
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
@ -223,10 +223,10 @@ define void @callee_no_stack_with_call() #0 {
; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
@ -1595,12 +1595,12 @@ define void @ipra_call_with_stack() #0 {
; MUBUF-NEXT: s_addk_i32 s32, 0x400
; MUBUF-NEXT: v_writelane_b32 v1, s30, 0
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
; MUBUF-NEXT: v_writelane_b32 v1, s31, 1
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_getpc_b64 s[16:17]
; MUBUF-NEXT: s_add_u32 s16, s16, local_empty_func@rel32@lo+4
; MUBUF-NEXT: s_addc_u32 s17, s17, local_empty_func@rel32@hi+12
; MUBUF-NEXT: v_writelane_b32 v1, s31, 1
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[16:17]
; MUBUF-NEXT: v_readlane_b32 s31, v1, 1
; MUBUF-NEXT: v_readlane_b32 s30, v1, 0
@ -1623,12 +1623,12 @@ define void @ipra_call_with_stack() #0 {
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
; FLATSCR-NEXT: v_writelane_b32 v1, s30, 0
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
; FLATSCR-NEXT: v_writelane_b32 v1, s31, 1
; FLATSCR-NEXT: scratch_store_dword off, v0, s33
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, local_empty_func@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, local_empty_func@rel32@hi+12
; FLATSCR-NEXT: v_writelane_b32 v1, s31, 1
; FLATSCR-NEXT: scratch_store_dword off, v0, s33
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: v_readlane_b32 s31, v1, 1
; FLATSCR-NEXT: v_readlane_b32 s30, v1, 0

View File

@ -198,11 +198,12 @@ define hidden void @use_workgroup_id_yz() #1 {
; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x:
; GCN-NOT: s6
; GCN: s_mov_b32 s12, s6
; GCN: s_mov_b32 s32, 0
; GCN: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, use_workgroup_id_x@rel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, use_workgroup_id_x@rel32@hi+12
; GCN-NOT: s6
; GCN: s_mov_b32 s12, s6
; GCN: s_mov_b32 s32, 0
; GCN: s_swappc_b64
; GCN-NEXT: s_endpgm

View File

@ -69,20 +69,20 @@ define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 {
; GFX803: ; %bb.0: ; %entry
; GFX803-NEXT: s_add_i32 s12, s12, s17
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX803-NEXT: s_add_u32 s0, s0, s17
; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX803-NEXT: s_addc_u32 s1, s1, 0
; GFX803-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s13
; GFX803-NEXT: s_addc_u32 s1, s1, 0
; GFX803-NEXT: s_mov_b32 s13, s15
; GFX803-NEXT: s_mov_b32 s12, s14
; GFX803-NEXT: v_or_b32_e32 v31, v0, v2
; GFX803-NEXT: s_mov_b32 s14, s16
; GFX803-NEXT: s_mov_b32 s32, 0
; GFX803-NEXT: s_getpc_b64 s[18:19]
; GFX803-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
; GFX803-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
; GFX803-NEXT: v_or_b32_e32 v31, v0, v2
; GFX803-NEXT: s_mov_b32 s14, s16
; GFX803-NEXT: s_mov_b32 s32, 0
; GFX803-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX803-NEXT: s_endpgm
;
@ -91,17 +91,17 @@ define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 {
; GFX900-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; GFX900-NEXT: s_add_u32 s0, s0, s17
; GFX900-NEXT: s_addc_u32 s1, s1, 0
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX900-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX900-NEXT: s_addc_u32 s1, s1, 0
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX900-NEXT: s_mov_b32 s14, s16
; GFX900-NEXT: s_mov_b32 s32, 0
; GFX900-NEXT: s_getpc_b64 s[18:19]
; GFX900-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
; GFX900-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
; GFX900-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX900-NEXT: s_mov_b32 s14, s16
; GFX900-NEXT: s_mov_b32 s32, 0
; GFX900-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX900-NEXT: s_endpgm
;
@ -119,10 +119,10 @@ define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 {
; GFX1010-NEXT: s_mov_b32 s13, s15
; GFX1010-NEXT: s_mov_b32 s12, s14
; GFX1010-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX1010-NEXT: s_mov_b32 s14, s16
; GFX1010-NEXT: s_getpc_b64 s[18:19]
; GFX1010-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
; GFX1010-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
; GFX1010-NEXT: s_mov_b32 s14, s16
; GFX1010-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1010-NEXT: s_endpgm
;
@ -132,14 +132,14 @@ define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 {
; GFX1100-NEXT: s_mov_b32 s12, s13
; GFX1100-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1100-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX1100-NEXT: s_getpc_b64 s[16:17]
; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
; GFX1100-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1100-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1100-NEXT: s_mov_b32 s13, s14
; GFX1100-NEXT: s_mov_b32 s14, s15
; GFX1100-NEXT: s_mov_b32 s32, 0
; GFX1100-NEXT: s_getpc_b64 s[16:17]
; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
; GFX1100-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1100-NEXT: s_endpgm
@ -153,23 +153,23 @@ define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
; GFX803: ; %bb.0: ; %entry
; GFX803-NEXT: s_add_i32 s12, s12, s17
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX803-NEXT: s_add_u32 s0, s0, s17
; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX803-NEXT: s_addc_u32 s1, s1, 0
; GFX803-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s13
; GFX803-NEXT: s_addc_u32 s1, s1, 0
; GFX803-NEXT: s_mov_b32 s13, s15
; GFX803-NEXT: s_mov_b32 s12, s14
; GFX803-NEXT: v_mov_b32_e32 v3, 0
; GFX803-NEXT: s_getpc_b64 s[18:19]
; GFX803-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
; GFX803-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
; GFX803-NEXT: v_or_b32_e32 v31, v0, v2
; GFX803-NEXT: s_mov_b32 s14, s16
; GFX803-NEXT: s_movk_i32 s32, 0x400
; GFX803-NEXT: buffer_store_dword v3, off, s[0:3], 0
; GFX803-NEXT: s_waitcnt vmcnt(0)
; GFX803-NEXT: s_getpc_b64 s[18:19]
; GFX803-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
; GFX803-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
; GFX803-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX803-NEXT: s_endpgm
;
@ -178,20 +178,20 @@ define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
; GFX900-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; GFX900-NEXT: s_add_u32 s0, s0, s17
; GFX900-NEXT: s_addc_u32 s1, s1, 0
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX900-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX900-NEXT: s_addc_u32 s1, s1, 0
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: s_getpc_b64 s[18:19]
; GFX900-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
; GFX900-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
; GFX900-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX900-NEXT: s_mov_b32 s14, s16
; GFX900-NEXT: s_movk_i32 s32, 0x400
; GFX900-NEXT: buffer_store_dword v3, off, s[0:3], 0
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_getpc_b64 s[18:19]
; GFX900-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
; GFX900-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
; GFX900-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX900-NEXT: s_endpgm
;
@ -210,12 +210,12 @@ define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
; GFX1010-NEXT: s_mov_b32 s13, s15
; GFX1010-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX1010-NEXT: s_mov_b32 s12, s14
; GFX1010-NEXT: s_mov_b32 s14, s16
; GFX1010-NEXT: buffer_store_dword v3, off, s[0:3], 0
; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-NEXT: s_getpc_b64 s[18:19]
; GFX1010-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
; GFX1010-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
; GFX1010-NEXT: s_mov_b32 s14, s16
; GFX1010-NEXT: buffer_store_dword v3, off, s[0:3], 0
; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1010-NEXT: s_endpgm
;
@ -226,6 +226,9 @@ define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
; GFX1100-NEXT: s_mov_b32 s12, s13
; GFX1100-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1100-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX1100-NEXT: s_getpc_b64 s[16:17]
; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
; GFX1100-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1100-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1100-NEXT: s_mov_b32 s13, s14
@ -233,9 +236,6 @@ define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
; GFX1100-NEXT: s_mov_b32 s32, 16
; GFX1100-NEXT: scratch_store_b32 off, v1, off dlc
; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: s_getpc_b64 s[16:17]
; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
; GFX1100-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1100-NEXT: s_endpgm
@ -320,21 +320,21 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
; GFX803: ; %bb.0: ; %entry
; GFX803-NEXT: s_add_i32 s12, s12, s17
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX803-NEXT: s_add_u32 s0, s0, s17
; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX803-NEXT: s_addc_u32 s1, s1, 0
; GFX803-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s13
; GFX803-NEXT: s_addc_u32 s1, s1, 0
; GFX803-NEXT: s_mov_b32 s13, s15
; GFX803-NEXT: s_mov_b32 s12, s14
; GFX803-NEXT: s_getpc_b64 s[18:19]
; GFX803-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
; GFX803-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
; GFX803-NEXT: v_or_b32_e32 v31, v0, v2
; GFX803-NEXT: s_mov_b32 s14, s16
; GFX803-NEXT: s_mov_b32 s33, 0
; GFX803-NEXT: s_mov_b32 s32, 0
; GFX803-NEXT: s_getpc_b64 s[18:19]
; GFX803-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
; GFX803-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
; GFX803-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX803-NEXT: s_endpgm
;
@ -343,18 +343,18 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
; GFX900-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; GFX900-NEXT: s_add_u32 s0, s0, s17
; GFX900-NEXT: s_addc_u32 s1, s1, 0
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX900-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX900-NEXT: s_addc_u32 s1, s1, 0
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_getpc_b64 s[18:19]
; GFX900-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
; GFX900-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
; GFX900-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX900-NEXT: s_mov_b32 s14, s16
; GFX900-NEXT: s_mov_b32 s33, 0
; GFX900-NEXT: s_mov_b32 s32, 0
; GFX900-NEXT: s_getpc_b64 s[18:19]
; GFX900-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
; GFX900-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
; GFX900-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX900-NEXT: s_endpgm
;
@ -373,10 +373,10 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
; GFX1010-NEXT: s_mov_b32 s13, s15
; GFX1010-NEXT: s_mov_b32 s12, s14
; GFX1010-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX1010-NEXT: s_mov_b32 s14, s16
; GFX1010-NEXT: s_getpc_b64 s[18:19]
; GFX1010-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
; GFX1010-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
; GFX1010-NEXT: s_mov_b32 s14, s16
; GFX1010-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1010-NEXT: s_endpgm
;
@ -386,15 +386,15 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
; GFX1100-NEXT: s_mov_b32 s12, s13
; GFX1100-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1100-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX1100-NEXT: s_getpc_b64 s[16:17]
; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
; GFX1100-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1100-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1100-NEXT: s_mov_b32 s13, s14
; GFX1100-NEXT: s_mov_b32 s14, s15
; GFX1100-NEXT: s_mov_b32 s33, 0
; GFX1100-NEXT: s_mov_b32 s32, 0
; GFX1100-NEXT: s_getpc_b64 s[16:17]
; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
; GFX1100-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1100-NEXT: s_endpgm
; GFX1010-NEXT s_add_u32 s12, s12, s17
@ -426,24 +426,24 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
; GFX803: ; %bb.0: ; %entry
; GFX803-NEXT: s_add_i32 s12, s12, s17
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX803-NEXT: s_add_u32 s0, s0, s17
; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX803-NEXT: s_addc_u32 s1, s1, 0
; GFX803-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
; GFX803-NEXT: s_mov_b32 s33, 0
; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s13
; GFX803-NEXT: s_addc_u32 s1, s1, 0
; GFX803-NEXT: s_mov_b32 s13, s15
; GFX803-NEXT: s_mov_b32 s12, s14
; GFX803-NEXT: v_mov_b32_e32 v3, 0
; GFX803-NEXT: s_getpc_b64 s[18:19]
; GFX803-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
; GFX803-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
; GFX803-NEXT: v_or_b32_e32 v31, v0, v2
; GFX803-NEXT: s_mov_b32 s14, s16
; GFX803-NEXT: s_movk_i32 s32, 0x400
; GFX803-NEXT: buffer_store_dword v3, off, s[0:3], s33
; GFX803-NEXT: s_waitcnt vmcnt(0)
; GFX803-NEXT: s_getpc_b64 s[18:19]
; GFX803-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
; GFX803-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
; GFX803-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX803-NEXT: s_endpgm
;
@ -452,21 +452,21 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
; GFX900-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; GFX900-NEXT: s_add_u32 s0, s0, s17
; GFX900-NEXT: s_addc_u32 s1, s1, 0
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX900-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX900-NEXT: s_mov_b32 s33, 0
; GFX900-NEXT: s_addc_u32 s1, s1, 0
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: s_getpc_b64 s[18:19]
; GFX900-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
; GFX900-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
; GFX900-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX900-NEXT: s_mov_b32 s14, s16
; GFX900-NEXT: s_movk_i32 s32, 0x400
; GFX900-NEXT: buffer_store_dword v3, off, s[0:3], s33
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_getpc_b64 s[18:19]
; GFX900-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
; GFX900-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
; GFX900-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX900-NEXT: s_endpgm
;
@ -486,12 +486,12 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
; GFX1010-NEXT: s_mov_b32 s13, s15
; GFX1010-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX1010-NEXT: s_mov_b32 s12, s14
; GFX1010-NEXT: s_mov_b32 s14, s16
; GFX1010-NEXT: buffer_store_dword v3, off, s[0:3], s33
; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-NEXT: s_getpc_b64 s[18:19]
; GFX1010-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
; GFX1010-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
; GFX1010-NEXT: s_mov_b32 s14, s16
; GFX1010-NEXT: buffer_store_dword v3, off, s[0:3], s33
; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1010-NEXT: s_endpgm
;
@ -503,6 +503,9 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
; GFX1100-NEXT: s_mov_b32 s12, s13
; GFX1100-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1100-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX1100-NEXT: s_getpc_b64 s[16:17]
; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
; GFX1100-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1100-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1100-NEXT: s_mov_b32 s13, s14
@ -510,9 +513,6 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
; GFX1100-NEXT: s_mov_b32 s32, 16
; GFX1100-NEXT: scratch_store_b32 off, v1, s33 dlc
; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: s_getpc_b64 s[16:17]
; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
; GFX1100-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1100-NEXT: s_endpgm
entry:

View File

@ -35,10 +35,10 @@ define float @call_split_type_used_outside_block_v2f32() #0 {
; GCN-NEXT: v_writelane_b32 v40, s16, 2
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: v_writelane_b32 v40, s30, 0
; GCN-NEXT: v_writelane_b32 v40, s31, 1
; GCN-NEXT: s_getpc_b64 s[16:17]
; GCN-NEXT: s_add_u32 s16, s16, func_v2f32@rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, func_v2f32@rel32@hi+12
; GCN-NEXT: v_writelane_b32 v40, s31, 1
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: v_readlane_b32 s31, v40, 1
; GCN-NEXT: v_readlane_b32 s30, v40, 0
@ -71,10 +71,10 @@ define float @call_split_type_used_outside_block_v3f32() #0 {
; GCN-NEXT: v_writelane_b32 v40, s16, 2
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: v_writelane_b32 v40, s30, 0
; GCN-NEXT: v_writelane_b32 v40, s31, 1
; GCN-NEXT: s_getpc_b64 s[16:17]
; GCN-NEXT: s_add_u32 s16, s16, func_v3f32@rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, func_v3f32@rel32@hi+12
; GCN-NEXT: v_writelane_b32 v40, s31, 1
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: v_readlane_b32 s31, v40, 1
; GCN-NEXT: v_readlane_b32 s30, v40, 0
@ -107,10 +107,10 @@ define half @call_split_type_used_outside_block_v4f16() #0 {
; GCN-NEXT: v_writelane_b32 v40, s16, 2
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: v_writelane_b32 v40, s30, 0
; GCN-NEXT: v_writelane_b32 v40, s31, 1
; GCN-NEXT: s_getpc_b64 s[16:17]
; GCN-NEXT: s_add_u32 s16, s16, func_v4f16@rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, func_v4f16@rel32@hi+12
; GCN-NEXT: v_writelane_b32 v40, s31, 1
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: v_readlane_b32 s31, v40, 1
; GCN-NEXT: v_readlane_b32 s30, v40, 0
@ -143,10 +143,10 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 {
; GCN-NEXT: v_writelane_b32 v40, s16, 2
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: v_writelane_b32 v40, s30, 0
; GCN-NEXT: v_writelane_b32 v40, s31, 1
; GCN-NEXT: s_getpc_b64 s[16:17]
; GCN-NEXT: s_add_u32 s16, s16, func_struct@rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, func_struct@rel32@hi+12
; GCN-NEXT: v_writelane_b32 v40, s31, 1
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: v_readlane_b32 s31, v40, 1
; GCN-NEXT: v_readlane_b32 s30, v40, 0
@ -189,16 +189,16 @@ define amdgpu_kernel void @v3i16_registers(i1 %cond) #0 {
; GCN-NEXT: s_cbranch_vccnz .LBB4_2
; GCN-NEXT: ; %bb.1: ; %if.else
; GCN-NEXT: s_add_u32 s8, s8, 8
; GCN-NEXT: s_addc_u32 s9, s9, 0
; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GCN-NEXT: s_addc_u32 s9, s9, 0
; GCN-NEXT: s_getpc_b64 s[18:19]
; GCN-NEXT: s_add_u32 s18, s18, func_v3i16@rel32@lo+4
; GCN-NEXT: s_addc_u32 s19, s19, func_v3i16@rel32@hi+12
; GCN-NEXT: v_or3_b32 v31, v0, v1, v2
; GCN-NEXT: s_mov_b32 s12, s14
; GCN-NEXT: s_mov_b32 s13, s15
; GCN-NEXT: s_mov_b32 s14, s16
; GCN-NEXT: s_getpc_b64 s[18:19]
; GCN-NEXT: s_add_u32 s18, s18, func_v3i16@rel32@lo+4
; GCN-NEXT: s_addc_u32 s19, s19, func_v3i16@rel32@hi+12
; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GCN-NEXT: s_branch .LBB4_3
; GCN-NEXT: .LBB4_2:
@ -240,16 +240,16 @@ define amdgpu_kernel void @v3f16_registers(i1 %cond) #0 {
; GCN-NEXT: s_cbranch_vccnz .LBB5_2
; GCN-NEXT: ; %bb.1: ; %if.else
; GCN-NEXT: s_add_u32 s8, s8, 8
; GCN-NEXT: s_addc_u32 s9, s9, 0
; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GCN-NEXT: s_addc_u32 s9, s9, 0
; GCN-NEXT: s_getpc_b64 s[18:19]
; GCN-NEXT: s_add_u32 s18, s18, func_v3f16@rel32@lo+4
; GCN-NEXT: s_addc_u32 s19, s19, func_v3f16@rel32@hi+12
; GCN-NEXT: v_or3_b32 v31, v0, v1, v2
; GCN-NEXT: s_mov_b32 s12, s14
; GCN-NEXT: s_mov_b32 s13, s15
; GCN-NEXT: s_mov_b32 s14, s16
; GCN-NEXT: s_getpc_b64 s[18:19]
; GCN-NEXT: s_add_u32 s18, s18, func_v3f16@rel32@lo+4
; GCN-NEXT: s_addc_u32 s19, s19, func_v3f16@rel32@hi+12
; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GCN-NEXT: s_branch .LBB5_3
; GCN-NEXT: .LBB5_2:

View File

@ -2917,8 +2917,8 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(ptr
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v5, v1
; GFX10-NEXT: v_mov_b32_e32 v4, v0
; GFX10-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX10-NEXT: v_mov_b32_e32 v4, v0
; GFX10-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, s5, v5
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
@ -5754,8 +5754,8 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__am
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v5, v1
; GFX10-NEXT: v_mov_b32_e32 v4, v0
; GFX10-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX10-NEXT: v_mov_b32_e32 v4, v0
; GFX10-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, s5, v5
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo

View File

@ -2917,8 +2917,8 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(ptr
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v5, v1
; GFX10-NEXT: v_mov_b32_e32 v4, v0
; GFX10-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX10-NEXT: v_mov_b32_e32 v4, v0
; GFX10-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, s5, v5
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
@ -5754,8 +5754,8 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__am
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v5, v1
; GFX10-NEXT: v_mov_b32_e32 v4, v0
; GFX10-NEXT: s_mov_b64 s[4:5], src_private_base
; GFX10-NEXT: v_mov_b32_e32 v4, v0
; GFX10-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, s5, v5
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo

View File

@ -19,12 +19,12 @@ define void @callee_with_stack_and_call() #0 {
; SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x400
; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s30, 0
; SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, 0
; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s31, 1
; SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33
; SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
; SPILL-TO-VGPR-NEXT: s_getpc_b64 s[4:5]
; SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; SPILL-TO-VGPR-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s31, 1
; SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33
; SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
; SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5]
; SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v40, 1
; SPILL-TO-VGPR-NEXT: v_readlane_b32 s30, v40, 0
@ -62,11 +62,11 @@ define void @callee_with_stack_and_call() #0 {
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5]
; NO-SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, 0
; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
; NO-SPILL-TO-VGPR-NEXT: s_getpc_b64 s[4:5]
; NO-SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; NO-SPILL-TO-VGPR-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
; NO-SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5]
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1

View File

@ -1267,16 +1267,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope
;
; GFX1032-LABEL: global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-NEXT: s_mov_b32 s14, -1
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-NEXT: s_add_u32 s12, s12, s11
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-NEXT: s_mov_b32 s2, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB2_3
; GFX1032-NEXT: ; %bb.1:
@ -1487,16 +1487,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope
;
; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB2_3
; GFX1032-DPP-NEXT: ; %bb.1:
@ -2487,16 +2487,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_
;
; GFX1032-LABEL: global_atomic_fadd_uni_address_uni_value_agent_scope_strictfp:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-NEXT: s_mov_b32 s14, -1
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-NEXT: s_add_u32 s12, s12, s11
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-NEXT: s_mov_b32 s2, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB4_3
; GFX1032-NEXT: ; %bb.1:
@ -2737,16 +2737,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_
;
; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_uni_value_agent_scope_strictfp:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB4_3
; GFX1032-DPP-NEXT: ; %bb.1:
@ -4543,16 +4543,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_default_scop
;
; GFX1032-LABEL: global_atomic_fadd_uni_address_uni_value_default_scope_strictfp:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-NEXT: s_mov_b32 s14, -1
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-NEXT: s_add_u32 s12, s12, s11
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-NEXT: s_mov_b32 s2, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB7_3
; GFX1032-NEXT: ; %bb.1:
@ -4793,16 +4793,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_default_scop
;
; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_uni_value_default_scope_strictfp:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1032-DPP-NEXT: ; %bb.1:
@ -5987,19 +5987,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
;
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: s_mov_b32 s42, s9
; GFX1032-NEXT: s_mov_b32 s9, exec_lo
; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
; GFX1032-NEXT: s_mov_b32 s50, -1
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
; GFX1032-NEXT: s_mov_b32 s46, 0
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB9_3
; GFX1032-NEXT: ; %bb.1:
@ -6446,19 +6446,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
;
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
; GFX1032-DPP-NEXT: s_mov_b32 s9, exec_lo
; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX1032-DPP-NEXT: ; %bb.1:
@ -7692,8 +7692,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0
; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
@ -8122,16 +8122,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_one_a
;
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-NEXT: s_mov_b32 s14, -1
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-NEXT: s_add_u32 s12, s12, s11
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-NEXT: s_mov_b32 s2, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB11_3
; GFX1032-NEXT: ; %bb.1:
@ -8379,16 +8379,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_one_a
;
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1032-DPP-NEXT: ; %bb.1:
@ -9217,8 +9217,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX1032-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0
; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[3:4], v[5:6]
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
@ -9555,16 +9555,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
;
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_strictfp:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-NEXT: s_mov_b32 s14, -1
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-NEXT: s_add_u32 s12, s12, s11
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-NEXT: s_mov_b32 s2, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB13_3
; GFX1032-NEXT: ; %bb.1:
@ -9812,16 +9812,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
;
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_strictfp:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB13_3
; GFX1032-DPP-NEXT: ; %bb.1:
@ -10650,8 +10650,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0
; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[3:4], v[5:6]
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
@ -11565,8 +11565,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0
; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[3:4], v[5:6]
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
@ -13748,8 +13748,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0
; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0

View File

@ -3366,17 +3366,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
;
; GFX1032-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
; GFX1032-NEXT: s_mov_b32 s50, -1
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
; GFX1032-NEXT: s_mov_b32 s46, 0
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB6_3
; GFX1032-NEXT: ; %bb.1:
@ -3806,17 +3806,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
;
; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX1032-DPP-NEXT: ; %bb.1:
@ -5094,8 +5094,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11]
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
@ -6469,8 +6469,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX1032-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[5:6]
; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[5:6]
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
@ -6914,17 +6914,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
;
; GFX1032-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
; GFX1032-NEXT: s_mov_b32 s50, -1
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
; GFX1032-NEXT: s_mov_b32 s46, 0
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB10_3
; GFX1032-NEXT: ; %bb.1:
@ -7354,17 +7354,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
;
; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1032-DPP-NEXT: ; %bb.1:
@ -8642,8 +8642,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11]
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0

View File

@ -3366,17 +3366,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
;
; GFX1032-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
; GFX1032-NEXT: s_mov_b32 s50, -1
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
; GFX1032-NEXT: s_mov_b32 s46, 0
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB6_3
; GFX1032-NEXT: ; %bb.1:
@ -3806,17 +3806,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
;
; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX1032-DPP-NEXT: ; %bb.1:
@ -5094,8 +5094,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
; GFX1032-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11]
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
@ -6469,8 +6469,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX1032-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[5:6]
; GFX1032-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[5:6]
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
@ -6914,17 +6914,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
;
; GFX1032-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
; GFX1032-NEXT: s_mov_b32 s50, -1
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
; GFX1032-NEXT: s_mov_b32 s46, 0
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB10_3
; GFX1032-NEXT: ; %bb.1:
@ -7354,17 +7354,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
;
; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1032-DPP-NEXT: ; %bb.1:
@ -8642,8 +8642,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
; GFX1032-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11]
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0

View File

@ -1379,16 +1379,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope
;
; GFX1032-LABEL: global_atomic_fsub_uni_address_uni_value_one_as_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-NEXT: s_mov_b32 s14, -1
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-NEXT: s_add_u32 s12, s12, s11
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-NEXT: s_mov_b32 s2, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB2_3
; GFX1032-NEXT: ; %bb.1:
@ -1629,16 +1629,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope
;
; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_uni_value_one_as_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB2_3
; GFX1032-DPP-NEXT: ; %bb.1:
@ -2711,16 +2711,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_
;
; GFX1032-LABEL: global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-NEXT: s_mov_b32 s14, -1
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-NEXT: s_add_u32 s12, s12, s11
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-NEXT: s_mov_b32 s2, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB4_3
; GFX1032-NEXT: ; %bb.1:
@ -2961,16 +2961,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_
;
; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB4_3
; GFX1032-DPP-NEXT: ; %bb.1:
@ -4871,16 +4871,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_default_scop
;
; GFX1032-LABEL: global_atomic_fsub_uni_address_uni_value_default_scope_strictfp:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-NEXT: s_mov_b32 s14, -1
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-NEXT: s_add_u32 s12, s12, s11
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-NEXT: s_mov_b32 s2, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB7_3
; GFX1032-NEXT: ; %bb.1:
@ -5121,16 +5121,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_default_scop
;
; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_uni_value_default_scope_strictfp:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1032-DPP-NEXT: ; %bb.1:
@ -6315,19 +6315,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
;
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: s_mov_b32 s42, s9
; GFX1032-NEXT: s_mov_b32 s9, exec_lo
; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
; GFX1032-NEXT: s_mov_b32 s50, -1
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
; GFX1032-NEXT: s_mov_b32 s46, 0
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB9_3
; GFX1032-NEXT: ; %bb.1:
@ -6774,19 +6774,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
;
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
; GFX1032-DPP-NEXT: s_mov_b32 s9, exec_lo
; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX1032-DPP-NEXT: ; %bb.1:
@ -8020,8 +8020,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0
; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
@ -8450,16 +8450,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_one_a
;
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_uni_value_one_as_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-NEXT: s_mov_b32 s14, -1
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-NEXT: s_add_u32 s12, s12, s11
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-NEXT: s_mov_b32 s2, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB11_3
; GFX1032-NEXT: ; %bb.1:
@ -8707,16 +8707,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_one_a
;
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_one_as_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1032-DPP-NEXT: ; %bb.1:
@ -9544,8 +9544,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX1032-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0
; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[3:4], v[5:6]
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
@ -9882,16 +9882,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
;
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-NEXT: s_mov_b32 s14, -1
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-NEXT: s_add_u32 s12, s12, s11
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-NEXT: s_mov_b32 s2, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB13_3
; GFX1032-NEXT: ; %bb.1:
@ -10139,16 +10139,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
;
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB13_3
; GFX1032-DPP-NEXT: ; %bb.1:
@ -10977,8 +10977,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0
; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[3:4], v[5:6]
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
@ -11892,8 +11892,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0
; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[3:4], v[5:6]
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
@ -14074,8 +14074,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0
; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0

View File

@ -54,6 +54,9 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s33, s16
; CHECK-NEXT: s_addc_u32 s45, s35, 0
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z13get_global_idj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z13get_global_idj@rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
; CHECK-NEXT: s_mov_b32 s12, s14
; CHECK-NEXT: s_mov_b32 s13, s15
@ -62,14 +65,14 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z13get_global_idj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z13get_global_idj@rel32@hi+12
; CHECK-NEXT: v_mov_b32_e32 v45, 0
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mov_b32_e32 v43, v0
; CHECK-NEXT: v_mov_b32_e32 v31, v40
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z12get_local_idj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z12get_local_idj@rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
@ -77,13 +80,13 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s12, s43
; CHECK-NEXT: s_mov_b32 s13, s42
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z12get_local_idj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z12get_local_idj@rel32@hi+12
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mov_b32_e32 v41, v0
; CHECK-NEXT: v_mov_b32_e32 v31, v40
; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
@ -92,26 +95,23 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s13, s42
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: ds_write_b32 v45, v45 offset:15360
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v43
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 2, v43
; CHECK-NEXT: v_mov_b32_e32 v31, v40
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: v_and_b32_e32 v0, 0x7ffffffc, v0
; CHECK-NEXT: v_and_b32_e32 v1, 28, v1
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s43
; CHECK-NEXT: global_load_dword v0, v0, s[52:53]
; CHECK-NEXT: s_mov_b32 s13, s42
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z3minjj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z3minjj@rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_and_b32_e32 v0, 0x7ffffffc, v0
; CHECK-NEXT: v_and_b32_e32 v1, 28, v1
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: global_load_dword v0, v0, s[52:53]
; CHECK-NEXT: s_mov_b32 s12, s43
; CHECK-NEXT: s_mov_b32 s13, s42
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_bfe_u32 v0, v0, v1, 4
; CHECK-NEXT: v_mov_b32_e32 v1, 12
@ -190,6 +190,9 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
; CHECK-NEXT: s_add_u32 s8, s34, 40
; CHECK-NEXT: s_addc_u32 s9, s35, 0
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
@ -197,9 +200,6 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s13, s42
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: ds_write_b32 v0, v58
@ -215,6 +215,9 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
; CHECK-NEXT: s_add_u32 s8, s34, 40
; CHECK-NEXT: s_addc_u32 s9, s35, 0
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
@ -223,9 +226,6 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_add_nc_u32_e32 v60, 1, v58
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: ds_write_b32 v0, v60
@ -241,6 +241,9 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
; CHECK-NEXT: s_add_u32 s8, s34, 40
; CHECK-NEXT: s_addc_u32 s9, s35, 0
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
@ -249,9 +252,6 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_add_nc_u32_e32 v60, 2, v58
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: ds_write_b32 v0, v60
@ -267,6 +267,9 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
; CHECK-NEXT: s_add_u32 s8, s34, 40
; CHECK-NEXT: s_addc_u32 s9, s35, 0
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
@ -275,9 +278,6 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_add_nc_u32_e32 v58, 3, v58
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: ds_write_b32 v0, v58
@ -319,6 +319,9 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
; CHECK-NEXT: s_add_u32 s8, s34, 40
; CHECK-NEXT: s_addc_u32 s9, s35, 0
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
@ -326,9 +329,6 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s13, s42
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: ds_write_b32 v0, v57
@ -356,15 +356,15 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: s_add_u32 s8, s34, 40
; CHECK-NEXT: s_addc_u32 s9, s35, 0
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s43
; CHECK-NEXT: s_mov_b32 s13, s42
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_mov_b32 s4, exec_lo
@ -381,15 +381,15 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_add_u32 s8, s34, 40
; CHECK-NEXT: s_addc_u32 s9, s35, 0
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z14get_local_sizej@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z14get_local_sizej@rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s43
; CHECK-NEXT: s_mov_b32 s13, s42
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z14get_local_sizej@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z14get_local_sizej@rel32@hi+12
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_add_co_u32 v41, vcc_lo, v0, v41
; CHECK-NEXT: v_cmp_le_u32_e32 vcc_lo, v47, v41
@ -439,16 +439,16 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_and_b32_e32 v0, 0xf0, v0
; CHECK-NEXT: v_and_b32_e32 v1, 15, v1
; CHECK-NEXT: s_addc_u32 s9, s35, 0
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: v_or3_b32 v2, v3, v2, v4
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s43
; CHECK-NEXT: s_mov_b32 s13, s42
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_or3_b32 v73, v2, v0, v1
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_addPU3AS1Vjj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_addPU3AS1Vjj@rel32@hi+12
; CHECK-NEXT: v_or3_b32 v2, v3, v2, v4
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s43
; CHECK-NEXT: s_mov_b32 s13, s42
; CHECK-NEXT: v_or3_b32 v73, v2, v0, v1
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v73
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 2, v73
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fffc, v0
@ -500,15 +500,15 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_mov_b32_e32 v2, v44
; CHECK-NEXT: s_add_u32 s8, s34, 40
; CHECK-NEXT: s_addc_u32 s9, s35, 0
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_subPU3AS1Vjj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_subPU3AS1Vjj@rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s43
; CHECK-NEXT: s_mov_b32 s13, s42
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_subPU3AS1Vjj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_subPU3AS1Vjj@rel32@hi+12
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_branch .LBB0_27
; CHECK-NEXT: .LBB0_33:
@ -803,6 +803,9 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: s_mov_b32 s33, s16
; CHECK-NEXT: s_addc_u32 s45, s39, 0
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z13get_global_idj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z13get_global_idj@rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
; CHECK-NEXT: s_mov_b32 s12, s14
; CHECK-NEXT: s_mov_b32 s13, s15
@ -811,14 +814,14 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[6:7]
; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z13get_global_idj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z13get_global_idj@rel32@hi+12
; CHECK-NEXT: v_mov_b32_e32 v43, 0
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mov_b32_e32 v42, v0
; CHECK-NEXT: v_mov_b32_e32 v31, v40
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z12get_local_idj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z12get_local_idj@rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
@ -826,13 +829,13 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: s_mov_b32 s12, s43
; CHECK-NEXT: s_mov_b32 s13, s42
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z12get_local_idj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z12get_local_idj@rel32@hi+12
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mul_lo_u32 v46, v0, 14
; CHECK-NEXT: v_mov_b32_e32 v31, v40
; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
@ -841,27 +844,24 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: s_mov_b32 s13, s42
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: ds_write_b32 v43, v43 offset:15360
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12
; CHECK-NEXT: v_add_nc_u32_e32 v44, 0x3c04, v46
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v42
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 2, v42
; CHECK-NEXT: v_mov_b32_e32 v31, v40
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
; CHECK-NEXT: v_and_b32_e32 v0, 0x7ffffffc, v0
; CHECK-NEXT: v_and_b32_e32 v1, 28, v1
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s43
; CHECK-NEXT: global_load_dword v0, v0, s[46:47]
; CHECK-NEXT: s_mov_b32 s13, s42
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z3minjj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z3minjj@rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_and_b32_e32 v0, 0x7ffffffc, v0
; CHECK-NEXT: v_and_b32_e32 v1, 28, v1
; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: global_load_dword v0, v0, s[46:47]
; CHECK-NEXT: s_mov_b32 s12, s43
; CHECK-NEXT: s_mov_b32 s13, s42
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_bfe_u32 v0, v0, v1, 4
; CHECK-NEXT: v_mov_b32_e32 v1, 12
@ -945,6 +945,9 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
; CHECK-NEXT: s_add_u32 s8, s38, 40
; CHECK-NEXT: s_addc_u32 s9, s39, 0
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
@ -952,9 +955,6 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: s_mov_b32 s13, s42
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_add_nc_u32_e32 v43, 1, v43
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: ds_write_b32 v0, v47
@ -982,15 +982,15 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: s_add_u32 s8, s38, 40
; CHECK-NEXT: s_addc_u32 s9, s39, 0
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s43
; CHECK-NEXT: s_mov_b32 s13, s42
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_endpgm
.5:

View File

@ -4,8 +4,8 @@
define amdgpu_cs void @if_then(ptr addrspace(8) inreg %input, ptr addrspace(8) inreg %output, <3 x i32> %LocalInvocationId) {
; GCN-LABEL: if_then:
; GCN: ; %bb.0: ; %.entry
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GCN-NEXT: ; %bb.1: ; %.bb0
; GCN-NEXT: v_mov_b32_e32 v3, 1
@ -60,8 +60,8 @@ define amdgpu_cs void @if_then(ptr addrspace(8) inreg %input, ptr addrspace(8) i
define amdgpu_cs void @if_else_vgpr_opt(ptr addrspace(8) inreg %input, ptr addrspace(8) inreg %output, <3 x i32> %LocalInvocationId) {
; GCN-LABEL: if_else_vgpr_opt:
; GCN: ; %bb.0: ; %.entry
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GCN-NEXT: ; %bb.1: ; %.bb0
; GCN-NEXT: v_mov_b32_e32 v3, 1

View File

@ -236,10 +236,10 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, pt
; GCN-NEXT: v_writelane_b32 v40, s4, 2
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: v_writelane_b32 v40, s30, 0
; GCN-NEXT: v_writelane_b32 v40, s31, 1
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, i32_fastcc_i32_byval_i32@rel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, i32_fastcc_i32_byval_i32@rel32@hi+12
; GCN-NEXT: v_writelane_b32 v40, s31, 1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
@ -881,6 +881,9 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v1, 9
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_getpc_b64 s[16:17]
; GCN-NEXT: s_add_u32 s16, s16, void_fastcc_multi_byval@rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, void_fastcc_multi_byval@rel32@hi+12
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:148
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:144
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:152
@ -895,9 +898,6 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20
; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8
; GCN-NEXT: s_getpc_b64 s[16:17]
; GCN-NEXT: s_add_u32 s16, s16, void_fastcc_multi_byval@rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, void_fastcc_multi_byval@rel32@hi+12
; GCN-NEXT: s_setpc_b64 s[16:17]
entry:
%alloca0 = alloca [3 x i32], align 16, addrspace(5)
@ -925,6 +925,9 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
; GCN-NEXT: s_getpc_b64 s[16:17]
; GCN-NEXT: s_add_u32 s16, s16, void_fastcc_byval_and_stack_passed@rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, void_fastcc_byval_and_stack_passed@rel32@hi+12
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_mov_b32_e32 v2, 0
@ -956,9 +959,6 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
; GCN-NEXT: v_mov_b32_e32 v28, 0
; GCN-NEXT: v_mov_b32_e32 v29, 0
; GCN-NEXT: v_mov_b32_e32 v30, 0
; GCN-NEXT: s_getpc_b64 s[16:17]
; GCN-NEXT: s_add_u32 s16, s16, void_fastcc_byval_and_stack_passed@rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, void_fastcc_byval_and_stack_passed@rel32@hi+12
; GCN-NEXT: s_setpc_b64 s[16:17]
entry:
%alloca = alloca [3 x i32], align 16, addrspace(5)

View File

@ -1023,8 +1023,8 @@ define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
;
; GFX10-WAVE32-LABEL: test_kill_divergent_loop:
; GFX10-WAVE32: ; %bb.0: ; %entry
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB10_3

View File

@ -21,10 +21,10 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
; MUBUF-NEXT: v_mov_b32_e32 v2, 0x4000
; MUBUF-NEXT: v_mov_b32_e32 v3, 0
; MUBUF-NEXT: v_mov_b32_e32 v4, 0x400000
; MUBUF-NEXT: s_mov_b32 s32, 0xc0000
; MUBUF-NEXT: s_getpc_b64 s[4:5]
; MUBUF-NEXT: s_add_u32 s4, s4, svm_eval_nodes@rel32@lo+4
; MUBUF-NEXT: s_addc_u32 s5, s5, svm_eval_nodes@rel32@hi+12
; MUBUF-NEXT: s_mov_b32 s32, 0xc0000
; MUBUF-NEXT: s_waitcnt lgkmcnt(0)
; MUBUF-NEXT: v_mov_b32_e32 v0, s0
; MUBUF-NEXT: s_mov_b64 s[0:1], s[36:37]
@ -85,10 +85,10 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
; MUBUF11-NEXT: v_mov_b32_e32 v1, 0x2000
; MUBUF11-NEXT: v_dual_mov_b32 v2, 0x4000 :: v_dual_mov_b32 v3, 0
; MUBUF11-NEXT: v_mov_b32_e32 v4, 0x400000
; MUBUF11-NEXT: s_movk_i32 s32, 0x6000
; MUBUF11-NEXT: s_getpc_b64 s[0:1]
; MUBUF11-NEXT: s_add_u32 s0, s0, svm_eval_nodes@rel32@lo+4
; MUBUF11-NEXT: s_addc_u32 s1, s1, svm_eval_nodes@rel32@hi+12
; MUBUF11-NEXT: s_movk_i32 s32, 0x6000
; MUBUF11-NEXT: s_waitcnt lgkmcnt(0)
; MUBUF11-NEXT: v_mov_b32_e32 v0, s2
; MUBUF11-NEXT: s_swappc_b64 s[30:31], s[0:1]
@ -112,10 +112,10 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
; FLATSCR11-NEXT: v_mov_b32_e32 v1, 0x2000
; FLATSCR11-NEXT: v_dual_mov_b32 v2, 0x4000 :: v_dual_mov_b32 v3, 0
; FLATSCR11-NEXT: v_mov_b32_e32 v4, 0x400000
; FLATSCR11-NEXT: s_movk_i32 s32, 0x6000
; FLATSCR11-NEXT: s_getpc_b64 s[0:1]
; FLATSCR11-NEXT: s_add_u32 s0, s0, svm_eval_nodes@rel32@lo+4
; FLATSCR11-NEXT: s_addc_u32 s1, s1, svm_eval_nodes@rel32@hi+12
; FLATSCR11-NEXT: s_movk_i32 s32, 0x6000
; FLATSCR11-NEXT: s_waitcnt lgkmcnt(0)
; FLATSCR11-NEXT: v_mov_b32_e32 v0, s2
; FLATSCR11-NEXT: s_swappc_b64 s[30:31], s[0:1]

View File

@ -25,17 +25,20 @@ define hidden fastcc void @bar(i32 %arg, ptr %arg1, ptr %arg2, ptr %arg3, ptr %a
; CHECK-NEXT: s_getpc_b64 s[18:19]
; CHECK-NEXT: s_add_u32 s18, s18, global@rel32@lo+1948
; CHECK-NEXT: s_addc_u32 s19, s19, global@rel32@hi+1956
; CHECK-NEXT: v_mov_b32_e32 v5, 0
; CHECK-NEXT: v_mov_b32_e32 v0, s18
; CHECK-NEXT: v_mov_b32_e32 v1, s19
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, eggs@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, eggs@rel32@hi+12
; CHECK-NEXT: v_mov_b32_e32 v5, 0
; CHECK-NEXT: v_mov_b32_e32 v0, s18
; CHECK-NEXT: v_mov_b32_e32 v1, s19
; CHECK-NEXT: s_setpc_b64 s[16:17]
; CHECK-NEXT: .LBB0_3: ; %LeafBlock1
; CHECK-NEXT: s_cbranch_scc0 .LBB0_5
; CHECK-NEXT: ; %bb.4: ; %bb8
; CHECK-NEXT: v_mov_b32_e32 v0, v1
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, quux@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, quux@rel32@hi+12
; CHECK-NEXT: v_mov_b32_e32 v1, v2
; CHECK-NEXT: v_mov_b32_e32 v2, v6
; CHECK-NEXT: v_mov_b32_e32 v3, v7
@ -47,9 +50,6 @@ define hidden fastcc void @bar(i32 %arg, ptr %arg1, ptr %arg2, ptr %arg3, ptr %a
; CHECK-NEXT: v_mov_b32_e32 v9, v13
; CHECK-NEXT: v_mov_b32_e32 v10, v14
; CHECK-NEXT: v_mov_b32_e32 v11, v15
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, quux@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, quux@rel32@hi+12
; CHECK-NEXT: s_setpc_b64 s[16:17]
; CHECK-NEXT: .LBB0_5: ; %bb9
; CHECK-NEXT: s_setpc_b64 s[30:31]

View File

@ -7,10 +7,10 @@ define void @tail_call_i32_inreg_uniform(i32 inreg %sgpr) {
; CHECK-LABEL: tail_call_i32_inreg_uniform:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s0, s16
; CHECK-NEXT: s_getpc_b64 s[18:19]
; CHECK-NEXT: s_add_u32 s18, s18, void_func_i32_inreg@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s19, s19, void_func_i32_inreg@rel32@hi+12
; CHECK-NEXT: s_mov_b32 s0, s16
; CHECK-NEXT: s_setpc_b64 s[18:19]
tail call void @void_func_i32_inreg(i32 inreg %sgpr)
ret void

View File

@ -290,6 +290,9 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: ; %bb.31: ; %bb7.i.i
; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41]
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
@ -297,9 +300,6 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_mov_b32 s13, s71
; GLOBALNESS1-NEXT: s_mov_b32 s14, s70
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0
; GLOBALNESS1-NEXT: .LBB1_32: ; %Flow
@ -308,6 +308,9 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: ; %bb.33: ; %bb11.i.i
; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41]
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
@ -315,9 +318,6 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_mov_b32 s13, s71
; GLOBALNESS1-NEXT: s_mov_b32 s14, s70
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GLOBALNESS1-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock
;
@ -582,6 +582,9 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: ; %bb.31: ; %bb7.i.i
; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41]
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
@ -589,9 +592,6 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_mov_b32 s13, s69
; GLOBALNESS0-NEXT: s_mov_b32 s14, s68
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0
; GLOBALNESS0-NEXT: .LBB1_32: ; %Flow
@ -600,6 +600,9 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: ; %bb.33: ; %bb11.i.i
; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41]
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
@ -607,9 +610,6 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_mov_b32 s13, s69
; GLOBALNESS0-NEXT: s_mov_b32 s14, s68
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GLOBALNESS0-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock
bb:

View File

@ -161,16 +161,16 @@ for.end:
define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_func, ptr %extern_func2) #0 {
; SI-LABEL: loop:
; SI: ; %bb.0: ; %main_body
; SI-NEXT: v_mov_b32_e32 v6, v0
; SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; SI-NEXT: s_mov_b32 s14, -1
; SI-NEXT: v_mov_b32_e32 v6, v0
; SI-NEXT: v_mov_b32_e32 v0, v1
; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v6
; SI-NEXT: s_mov_b32 s15, 0x31c16000
; SI-NEXT: s_add_u32 s12, s12, s1
; SI-NEXT: s_addc_u32 s13, s13, 0
; SI-NEXT: s_mov_b32 s32, 0
; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v6
; SI-NEXT: ; implicit-def: $vgpr1
; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo
; SI-NEXT: s_xor_b32 s6, exec_lo, s0
@ -243,11 +243,11 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e
; SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; SI-NEXT: s_mov_b32 s14, -1
; SI-NEXT: v_mov_b32_e32 v40, v1
; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0
; SI-NEXT: s_mov_b32 s15, 0x31c16000
; SI-NEXT: s_add_u32 s12, s12, s1
; SI-NEXT: s_addc_u32 s13, s13, 0
; SI-NEXT: s_mov_b32 s32, 0
; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0
; SI-NEXT: ; implicit-def: $vgpr0
; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo
; SI-NEXT: s_xor_b32 s6, exec_lo, s0

View File

@ -372,8 +372,8 @@ define amdgpu_kernel void @test_loop_with_if(ptr addrspace(1) %arg) #0 {
; GFX1032-NEXT: .LBB10_2: ; %bb2
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1032-NEXT: v_cmp_ge_i32_e64 s4, v1, v0
; GFX1032-NEXT: s_mov_b32 s3, 0
; GFX1032-NEXT: v_cmp_lt_i32_e32 vcc_lo, v1, v0
; GFX1032-NEXT: s_mov_b32 s3, 0
; GFX1032-NEXT: s_and_saveexec_b32 s5, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB10_4
; GFX1032-NEXT: ; %bb.3: ; %bb5
@ -515,8 +515,8 @@ bb13:
define amdgpu_kernel void @test_loop_with_if_else_break(ptr addrspace(1) %arg) #0 {
; GFX1032-LABEL: test_loop_with_if_else_break:
; GFX1032: ; %bb.0: ; %bb
; GFX1032-NEXT: s_mov_b32 s2, 0
; GFX1032-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_mov_b32 s2, 0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB11_6
; GFX1032-NEXT: ; %bb.1: ; %.preheader

View File

@ -416,10 +416,10 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg
; GFX9-O3-NEXT: v_mov_b32_e32 v0, s8
; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-O3-NEXT: v_cndmask_b32_e64 v2, 0, v0, s[34:35]
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2
; GFX9-O3-NEXT: s_getpc_b64 s[36:37]
; GFX9-O3-NEXT: s_add_u32 s36, s36, strict_wwm_called@rel32@lo+4
; GFX9-O3-NEXT: s_addc_u32 s37, s37, strict_wwm_called@rel32@hi+12
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2
; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[36:37]
; GFX9-O3-NEXT: v_mov_b32_e32 v1, v0
; GFX9-O3-NEXT: v_add_u32_e32 v1, v1, v2

View File

@ -426,12 +426,12 @@ define amdgpu_kernel void @call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) {
; GFX9-O3-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-O3-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX9-O3-NEXT: s_mov_b64 s[0:1], s[24:25]
; GFX9-O3-NEXT: v_mov_b32_e32 v31, v3
; GFX9-O3-NEXT: s_mov_b64 s[2:3], s[26:27]
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v6
; GFX9-O3-NEXT: s_getpc_b64 s[22:23]
; GFX9-O3-NEXT: s_add_u32 s22, s22, called@rel32@lo+4
; GFX9-O3-NEXT: s_addc_u32 s23, s23, called@rel32@hi+12
; GFX9-O3-NEXT: v_mov_b32_e32 v31, v3
; GFX9-O3-NEXT: s_mov_b64 s[2:3], s[26:27]
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v6
; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[22:23]
; GFX9-O3-NEXT: v_mov_b32_e32 v3, v0
; GFX9-O3-NEXT: v_add_u32_e32 v3, v3, v6
@ -1278,12 +1278,12 @@ define amdgpu_kernel void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 in
; GFX9-O3-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-O3-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX9-O3-NEXT: s_mov_b64 s[0:1], s[24:25]
; GFX9-O3-NEXT: v_mov_b32_e32 v31, v3
; GFX9-O3-NEXT: s_mov_b64 s[2:3], s[26:27]
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v6
; GFX9-O3-NEXT: s_getpc_b64 s[22:23]
; GFX9-O3-NEXT: s_add_u32 s22, s22, strict_wwm_called@rel32@lo+4
; GFX9-O3-NEXT: s_addc_u32 s23, s23, strict_wwm_called@rel32@hi+12
; GFX9-O3-NEXT: v_mov_b32_e32 v31, v3
; GFX9-O3-NEXT: s_mov_b64 s[2:3], s[26:27]
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v6
; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[22:23]
; GFX9-O3-NEXT: v_mov_b32_e32 v3, v0
; GFX9-O3-NEXT: v_add_u32_e32 v3, v3, v6

View File

@ -41,8 +41,8 @@ define fastcc ptr @wrongUseOfPostDominate(ptr readonly %s, i32 %off, ptr readnon
; ENABLE-NEXT: bhs .LBB0_6
; ENABLE-NEXT: @ %bb.5: @ %while.body
; ENABLE-NEXT: @ in Loop: Header=BB0_4 Depth=1
; ENABLE-NEXT: cmp r0, r2
; ENABLE-NEXT: mov r1, r3
; ENABLE-NEXT: cmp r0, r2
; ENABLE-NEXT: blo .LBB0_4
; ENABLE-NEXT: .LBB0_6: @ %if.end29
; ENABLE-NEXT: pop {r11, pc}
@ -131,8 +131,8 @@ define fastcc ptr @wrongUseOfPostDominate(ptr readonly %s, i32 %off, ptr readnon
; DISABLE-NEXT: bhs .LBB0_6
; DISABLE-NEXT: @ %bb.5: @ %while.body
; DISABLE-NEXT: @ in Loop: Header=BB0_4 Depth=1
; DISABLE-NEXT: cmp r0, r2
; DISABLE-NEXT: mov r1, r3
; DISABLE-NEXT: cmp r0, r2
; DISABLE-NEXT: blo .LBB0_4
; DISABLE-NEXT: .LBB0_6: @ %if.end29
; DISABLE-NEXT: pop {r11, pc}

View File

@ -2017,8 +2017,8 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t
; ARM-DISABLE-NEXT: sub r4, sp, #24
; ARM-DISABLE-NEXT: bfc r4, #0, #4
; ARM-DISABLE-NEXT: mov sp, r4
; ARM-DISABLE-NEXT: tst r2, #1
; ARM-DISABLE-NEXT: vst1.64 {d8, d9}, [r4:128]
; ARM-DISABLE-NEXT: tst r2, #1
; ARM-DISABLE-NEXT: vstr d10, [r4, #16]
; ARM-DISABLE-NEXT: beq LBB12_2
; ARM-DISABLE-NEXT: @ %bb.1: @ %bb3
@ -2123,8 +2123,8 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t
; THUMB-DISABLE-NEXT: sub.w r4, sp, #24
; THUMB-DISABLE-NEXT: bfc r4, #0, #4
; THUMB-DISABLE-NEXT: mov sp, r4
; THUMB-DISABLE-NEXT: lsls r1, r2, #31
; THUMB-DISABLE-NEXT: vst1.64 {d8, d9}, [r4:128]
; THUMB-DISABLE-NEXT: lsls r1, r2, #31
; THUMB-DISABLE-NEXT: vstr d10, [r4, #16]
; THUMB-DISABLE-NEXT: beq LBB12_2
; THUMB-DISABLE-NEXT: @ %bb.1: @ %bb3

View File

@ -50,9 +50,9 @@ define void @test_pr22678() {
define <4 x i32> @test_vmovrrd_combine() nounwind {
; CHECK-LABEL: test_vmovrrd_combine:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: @ implicit-def: $q8
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: @ implicit-def: $q8
; CHECK-NEXT: bne .LBB3_2
; CHECK-NEXT: @ %bb.1: @ %bb1.preheader
; CHECK-NEXT: vmov.i32 q8, #0x0

View File

@ -54,13 +54,13 @@ define dso_local double @P10_Spill_CR_EQ(ptr %arg) local_unnamed_addr #0 {
; CHECK-NEXT: # implicit-def: $r4
; CHECK-NEXT: .LBB0_8: # %bb20
; CHECK-NEXT: mfcr r12
; CHECK-NEXT: cmpwi cr2, r3, -1
; CHECK-NEXT: cmpwi cr3, r4, -1
; CHECK-NEXT: cmpwi cr2, r3, -1
; CHECK-NEXT: stw r12, 8(r1)
; CHECK-NEXT: cmpwi cr7, r3, 0
; CHECK-NEXT: cmpwi cr6, r4, 0
; CHECK-NEXT: crand 4*cr5+gt, 4*cr2+gt, 4*cr1+lt
; CHECK-NEXT: crand 4*cr5+lt, 4*cr3+gt, 4*cr5+un
; CHECK-NEXT: crand 4*cr5+gt, 4*cr2+gt, 4*cr1+lt
; CHECK-NEXT: # implicit-def: $x3
; CHECK-NEXT: bc 4, 4*cr5+gt, .LBB0_10
; CHECK-NEXT: # %bb.9: # %bb34
@ -95,15 +95,15 @@ define dso_local double @P10_Spill_CR_EQ(ptr %arg) local_unnamed_addr #0 {
; CHECK-NEXT: lwz r7, 0(r3)
; CHECK-NEXT: .LBB0_18: # %bb58
; CHECK-NEXT: lwz r6, 92(r6)
; CHECK-NEXT: cmpwi cr4, r7, 1
; CHECK-NEXT: crand 4*cr7+un, 4*cr3+gt, 4*cr6+un
; CHECK-NEXT: cmpwi cr3, r5, 1
; CHECK-NEXT: cmpwi cr4, r7, 1
; CHECK-NEXT: crand 4*cr7+gt, 4*cr7+eq, 4*cr1+lt
; CHECK-NEXT: # implicit-def: $x5
; CHECK-NEXT: crand 4*cr6+un, 4*cr2+eq, 4*cr6+un
; CHECK-NEXT: crand 4*cr5+un, 4*cr6+eq, 4*cr5+un
; CHECK-NEXT: crand 4*cr6+gt, 4*cr3+lt, 4*cr6+gt
; CHECK-NEXT: crand 4*cr7+lt, 4*cr4+lt, 4*cr7+lt
; CHECK-NEXT: crand 4*cr6+gt, 4*cr3+lt, 4*cr6+gt
; CHECK-NEXT: cmpwi r6, 1
; CHECK-NEXT: crand 4*cr6+lt, lt, 4*cr6+lt
; CHECK-NEXT: bc 4, 4*cr6+gt, .LBB0_20

View File

@ -24,8 +24,8 @@ define arm_aapcs_vfpcc void @fast_float_mul(ptr nocapture %a, ptr nocapture read
; CHECK-NEXT: cmpeq.w r12, #0
; CHECK-NEXT: beq .LBB0_4
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
; CHECK-NEXT: subs r4, r3, #1
; CHECK-NEXT: and r12, r3, #3
; CHECK-NEXT: subs r4, r3, #1
; CHECK-NEXT: cmp r4, #3
; CHECK-NEXT: bhs .LBB0_6
; CHECK-NEXT: @ %bb.3:

View File

@ -10,8 +10,8 @@ define void @arm_min_q31(ptr nocapture readonly %pSrc, i32 %blockSize, ptr nocap
; CHECK-NEXT: subs.w r9, r1, #1
; CHECK-NEXT: beq .LBB0_3
; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
; CHECK-NEXT: subs r7, r1, #2
; CHECK-NEXT: and r8, r9, #3
; CHECK-NEXT: subs r7, r1, #2
; CHECK-NEXT: cmp r7, #3
; CHECK-NEXT: bhs .LBB0_4
; CHECK-NEXT: @ %bb.2:

View File

@ -1411,8 +1411,8 @@ define arm_aapcs_vfpcc float @half_half_mac(ptr nocapture readonly %a, ptr nocap
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: cbz r2, .LBB9_3
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: subs r3, r2, #1
; CHECK-NEXT: and r12, r2, #3
; CHECK-NEXT: subs r3, r2, #1
; CHECK-NEXT: cmp r3, #3
; CHECK-NEXT: bhs .LBB9_4
; CHECK-NEXT: @ %bb.2:
@ -1566,8 +1566,8 @@ define arm_aapcs_vfpcc float @half_half_acc(ptr nocapture readonly %a, ptr nocap
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: cbz r2, .LBB10_3
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: subs r3, r2, #1
; CHECK-NEXT: and r12, r2, #3
; CHECK-NEXT: subs r3, r2, #1
; CHECK-NEXT: cmp r3, #3
; CHECK-NEXT: bhs .LBB10_4
; CHECK-NEXT: @ %bb.2:
@ -1721,8 +1721,8 @@ define arm_aapcs_vfpcc float @half_short_mac(ptr nocapture readonly %a, ptr noca
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: cbz r2, .LBB11_3
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: subs r3, r2, #1
; CHECK-NEXT: and r12, r2, #3
; CHECK-NEXT: subs r3, r2, #1
; CHECK-NEXT: cmp r3, #3
; CHECK-NEXT: bhs .LBB11_4
; CHECK-NEXT: @ %bb.2:

View File

@ -348,8 +348,8 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(ptr nocapture readonly
; CHECK-NEXT: cmpeq r7, #0
; CHECK-NEXT: beq .LBB5_4
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
; CHECK-NEXT: subs r7, r4, #1
; CHECK-NEXT: and r12, r4, #3
; CHECK-NEXT: subs r7, r4, #1
; CHECK-NEXT: cmp r7, #3
; CHECK-NEXT: bhs .LBB5_6
; CHECK-NEXT: @ %bb.3:
@ -624,8 +624,8 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(ptr nocapture readonl
; CHECK-NEXT: cmpeq r7, #0
; CHECK-NEXT: beq .LBB7_4
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
; CHECK-NEXT: subs r7, r4, #1
; CHECK-NEXT: and r12, r4, #3
; CHECK-NEXT: subs r7, r4, #1
; CHECK-NEXT: cmp r7, #3
; CHECK-NEXT: bhs .LBB7_6
; CHECK-NEXT: @ %bb.3:
@ -900,8 +900,8 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(ptr nocapture readonly
; CHECK-NEXT: cmpeq r7, #0
; CHECK-NEXT: beq .LBB9_4
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
; CHECK-NEXT: subs r7, r4, #1
; CHECK-NEXT: and r12, r4, #3
; CHECK-NEXT: subs r7, r4, #1
; CHECK-NEXT: cmp r7, #3
; CHECK-NEXT: bhs .LBB9_6
; CHECK-NEXT: @ %bb.3:

View File

@ -446,8 +446,8 @@ define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(ptr nocapture read
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vdup.32 q0, r3
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w r3, r3, r7, lsr #2
; CHECK-NEXT: vmov.32 q0[0], r12
; CHECK-NEXT: add.w r3, r3, r7, lsr #2
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB6_5: @ %vector.body46
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1

View File

@ -1060,12 +1060,12 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no
; CHECK-NEXT: vfma.f32 q0, q4, r5
; CHECK-NEXT: vldrw.u32 q3, [r4, #-8]
; CHECK-NEXT: vfma.f32 q0, q5, r6
; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: vfma.f32 q0, q2, lr
; CHECK-NEXT: vldrw.u32 q1, [r4, #-4]
; CHECK-NEXT: vfma.f32 q0, q2, lr
; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: vfma.f32 q0, q3, r11
; CHECK-NEXT: cmp r0, #16
; CHECK-NEXT: vfma.f32 q0, q1, r8
; CHECK-NEXT: cmp r0, #16
; CHECK-NEXT: blo .LBB16_9
; CHECK-NEXT: @ %bb.7: @ %for.body.preheader
; CHECK-NEXT: @ in Loop: Header=BB16_6 Depth=1
@ -1603,8 +1603,8 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_df1_f32(ptr nocapture readonly %
; CHECK-NEXT: .LBB19_3: @ %do.body
; CHECK-NEXT: @ =>This Loop Header: Depth=1
; CHECK-NEXT: @ Child Loop BB19_5 Depth 2
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: ldrd r5, r11, [r9]
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: ldrd r8, r10, [r9, #8]
; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill

View File

@ -1376,8 +1376,8 @@ define arm_aapcs_vfpcc void @gather_inc_v16i8_simple(ptr noalias nocapture reado
; CHECK-NEXT: bne .LBB16_3
; CHECK-NEXT: @ %bb.4: @ %middle.block
; CHECK-NEXT: @ in Loop: Header=BB16_2 Depth=1
; CHECK-NEXT: ldr r1, [sp, #60] @ 4-byte Reload
; CHECK-NEXT: ldr.w r9, [sp, #52] @ 4-byte Reload
; CHECK-NEXT: ldr r1, [sp, #60] @ 4-byte Reload
; CHECK-NEXT: cmp r9, r1
; CHECK-NEXT: bne .LBB16_2
; CHECK-NEXT: .LBB16_5: @ %for.cond.cleanup

View File

@ -712,8 +712,8 @@ define dso_local void @arm_mat_mult_q15(ptr noalias nocapture readonly %A, ptr n
; CHECK-NEXT: @ %bb.12: @ %middle.block
; CHECK-NEXT: @ in Loop: Header=BB10_8 Depth=2
; CHECK-NEXT: vaddv.u32 r10, q4
; CHECK-NEXT: cmp r2, r12
; CHECK-NEXT: mov r4, r2
; CHECK-NEXT: cmp r2, r12
; CHECK-NEXT: beq .LBB10_7
; CHECK-NEXT: .LBB10_13: @ %for.body8.us.us.preheader
; CHECK-NEXT: @ in Loop: Header=BB10_8 Depth=2

View File

@ -180,9 +180,9 @@ define void @correlate(ptr nocapture noundef readonly %ID, ptr nocapture noundef
; CHECK-NEXT: @ in Loop: Header=BB4_4 Depth=1
; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
; CHECK-NEXT: add.w r2, r9, r10
; CHECK-NEXT: sub.w r5, r8, r9
; CHECK-NEXT: add.w r7, r1, r9, lsl #1
; CHECK-NEXT: add.w r2, r1, r2, lsl #1
; CHECK-NEXT: sub.w r5, r8, r9
; CHECK-NEXT: dlstp.32 lr, r5
; CHECK-NEXT: .LBB4_11: @ %vec.epilog.vector.body
; CHECK-NEXT: @ Parent Loop BB4_4 Depth=1

View File

@ -258,11 +258,11 @@ define void @DCT_mve3(ptr nocapture readonly %S, ptr nocapture readonly %pIn, pt
; CHECK-NEXT: @ Child Loop BB2_3 Depth 2
; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
; CHECK-NEXT: adds r0, r5, #2
; CHECK-NEXT: adds r2, r5, #1
; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
; CHECK-NEXT: mov r3, r9
; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: mov r0, r12
; CHECK-NEXT: mov r4, r10
; CHECK-NEXT: vmov q2, q0
@ -618,13 +618,13 @@ define void @DCT_mve5(ptr nocapture readonly %S, ptr nocapture readonly %pIn, pt
; CHECK-NEXT: adds r1, r0, #4
; CHECK-NEXT: ldr r4, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: add.w r10, r0, #2
; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill
; CHECK-NEXT: adds r1, r0, #3
; CHECK-NEXT: add.w r10, r0, #2
; CHECK-NEXT: add.w r11, r0, #1
; CHECK-NEXT: str r1, [sp, #24] @ 4-byte Spill
; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: str r1, [sp, #24] @ 4-byte Spill
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: vmov q3, q1
; CHECK-NEXT: vmov q2, q1
@ -833,8 +833,8 @@ define void @DCT_mve6(ptr nocapture readonly %S, ptr nocapture readonly %pIn, pt
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: add.w r11, r0, #2
; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: adds r4, r0, #1
; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: vmov q3, q1
; CHECK-NEXT: vmov q4, q1
@ -1068,8 +1068,8 @@ define void @DCT_mve7(ptr nocapture readonly %S, ptr nocapture readonly %pIn, pt
; CHECK-NEXT: vmov.i32 q2, #0x0
; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: adds r4, r0, #2
; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: add.w r8, r0, #1
; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: mov r3, r9
; CHECK-NEXT: vmov q4, q2
; CHECK-NEXT: vmov q5, q2
@ -1347,11 +1347,11 @@ define void @DCT_mve8(ptr nocapture readonly %S, ptr nocapture readonly %pIn, pt
; CHECK-NEXT: adds r1, r0, #4
; CHECK-NEXT: ldr.w r9, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: vmov.i32 q3, #0x0
; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: adds r4, r0, #3
; CHECK-NEXT: str r1, [sp, #24] @ 4-byte Spill
; CHECK-NEXT: adds r4, r0, #3
; CHECK-NEXT: add.w r8, r0, #2
; CHECK-NEXT: adds r1, r0, #1
; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: mov r3, r12
; CHECK-NEXT: vmov q5, q3
; CHECK-NEXT: vmov q6, q3

View File

@ -100,8 +100,8 @@ define void @arm_cmplx_dot_prod_q15(ptr nocapture readonly %pSrcA, ptr nocapture
; CHECK-NEXT: ldr.w r8, [sp, #36]
; CHECK-NEXT: mov r6, r12
; CHECK-NEXT: mov r5, r7
; CHECK-NEXT: and r2, r2, #3
; CHECK-NEXT: lsrl r6, r5, #6
; CHECK-NEXT: and r2, r2, #3
; CHECK-NEXT: wls lr, r2, .LBB1_7
; CHECK-NEXT: .LBB1_5: @ %while.body11
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1

View File

@ -708,14 +708,14 @@ define ptr @signext(ptr %input_row, ptr %input_col, i16 zeroext %output_ch, i16
; CHECK-NEXT: mov r6, r12
; CHECK-NEXT: .LBB5_4: @ %for.cond.cleanup23
; CHECK-NEXT: @ in Loop: Header=BB5_5 Depth=1
; CHECK-NEXT: add.w r0, r8, r10
; CHECK-NEXT: ldr r1, [sp, #100]
; CHECK-NEXT: add.w r0, r8, r10
; CHECK-NEXT: add r0, r6
; CHECK-NEXT: add r0, r12
; CHECK-NEXT: strb.w r0, [r1, r11]
; CHECK-NEXT: add.w r11, r11, #1
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: cmp r11, r0
; CHECK-NEXT: beq .LBB5_8
; CHECK-NEXT: .LBB5_5: @ %for.body
@ -933,14 +933,14 @@ define ptr @signext_optsize(ptr %input_row, ptr %input_col, i16 zeroext %output_
; CHECK-NEXT: mov r6, r12
; CHECK-NEXT: .LBB6_7: @ %for.cond.cleanup23
; CHECK-NEXT: @ in Loop: Header=BB6_3 Depth=1
; CHECK-NEXT: add.w r0, r8, r10
; CHECK-NEXT: ldr r1, [sp, #100]
; CHECK-NEXT: add.w r0, r8, r10
; CHECK-NEXT: add r0, r6
; CHECK-NEXT: add r0, r12
; CHECK-NEXT: strb.w r0, [r1, r11]
; CHECK-NEXT: add.w r11, r11, #1
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: cmp r11, r0
; CHECK-NEXT: bne .LBB6_3
; CHECK-NEXT: .LBB6_8: @ %if.end

View File

@ -11,8 +11,8 @@ define void @arm_min_helium_f32(ptr %pSrc, i32 %blockSize, ptr nocapture %pResul
; CHECK-NEXT: vidup.u32 q2, r4, #1
; CHECK-NEXT: movw r5, #54437
; CHECK-NEXT: movt r5, #21352
; CHECK-NEXT: vdup.32 q1, r5
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: vdup.32 q1, r5
; CHECK-NEXT: dlstp.32 lr, r1
; CHECK-NEXT: .LBB0_1: @ %do.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1

View File

@ -11,10 +11,10 @@ define arm_aapcs_vfpcc void @start12(ptr nocapture readonly %x, ptr nocapture re
; CHECK-NEXT: poplt {r4, pc}
; CHECK-NEXT: .LBB0_1: @ %vector.ph
; CHECK-NEXT: vmov r12, s0
; CHECK-NEXT: subs r3, #12
; CHECK-NEXT: adds r0, #48
; CHECK-NEXT: adds r1, #48
; CHECK-NEXT: adds r2, #48
; CHECK-NEXT: subs r3, #12
; CHECK-NEXT: dlstp.32 lr, r3
; CHECK-NEXT: .LBB0_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1

View File

@ -42,9 +42,9 @@ define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 {
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: add r0, sp, #28
; CHECK-NEXT: movs r5, #0
; CHECK-NEXT: cmp r4, #1
; CHECK-NEXT: stm r0!, {r1, r2, r3}
; CHECK-NEXT: add r0, sp, #28
; CHECK-NEXT: cmp r4, #1
; CHECK-NEXT: str r0, [sp, #4]
; CHECK-NEXT: blt .LBB0_2
; CHECK-NEXT: .LBB0_1: @ %for.body

View File

@ -46,8 +46,8 @@ define hidden i32 @f(i32 %n) local_unnamed_addr #0 {
; CHECK-NEXT: cmp r5, #1
; CHECK-NEXT: blt .LBB0_3
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: subs r0, r5, #1
; CHECK-NEXT: and r12, r5, #3
; CHECK-NEXT: subs r0, r5, #1
; CHECK-NEXT: cmp r0, #3
; CHECK-NEXT: bhs .LBB0_4
; CHECK-NEXT: @ %bb.2:

View File

@ -58,8 +58,8 @@ define void @double_foobar() {
; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
; CHECK-NEXT: movs r0, #2
; CHECK-NEXT: str r0, [r1]
; CHECK-NEXT: add r1, sp, #4
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: add r1, sp, #4
; CHECK-NEXT: ldr r0, [r1, #8]
; CHECK-NEXT: mov sp, r0
; CHECK-NEXT: ldr r0, [r1, #4]

View File

@ -23,9 +23,9 @@ define i32 @simple(ptr %a, ptr %b, i32 %x) nounwind {
; A9-NEXT: add.w r4, lr, r2
; A9-NEXT: ldr.w r6, [lr, r2]
; A9-NEXT: add r0, r3
; A9-NEXT: adds r3, r4, r2
; A9-NEXT: add r0, r12
; A9-NEXT: ldr r5, [r4, r2]
; A9-NEXT: add r0, r12
; A9-NEXT: adds r3, r4, r2
; A9-NEXT: add r0, r6
; A9-NEXT: add r3, r2
; A9-NEXT: add r0, r5