[MachineScheduler] Fix physreg dependencies of ExitSU (#123541)
Providing the correct operand index allows addPhysRegDataDeps to compute the correct latency. Pull Request: https://github.com/llvm/llvm-project/pull/123541
This commit is contained in:
parent
15336823ad
commit
ff9c041d96
@ -209,13 +209,25 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
|
||||
ExitSU.setInstr(ExitMI);
|
||||
// Add dependencies on the defs and uses of the instruction.
|
||||
if (ExitMI) {
|
||||
const MCInstrDesc &MIDesc = ExitMI->getDesc();
|
||||
for (const MachineOperand &MO : ExitMI->all_uses()) {
|
||||
unsigned OpIdx = MO.getOperandNo();
|
||||
Register Reg = MO.getReg();
|
||||
if (Reg.isPhysical()) {
|
||||
// addPhysRegDataDeps uses the provided operand index to retrieve
|
||||
// the operand use cycle from the scheduling model. If the operand
|
||||
// is "fake" (e.g., an operand of a call instruction used to pass
|
||||
// an argument to the called function.), the scheduling model may not
|
||||
// have an entry for it. If this is the case, pass -1 as operand index,
|
||||
// which will cause addPhysRegDataDeps to add an artificial dependency.
|
||||
// FIXME: Using hasImplicitUseOfPhysReg here is inaccurate as it misses
|
||||
// aliases. When fixing, make sure to update addPhysRegDataDeps, too.
|
||||
bool IsRealUse = OpIdx < MIDesc.getNumOperands() ||
|
||||
MIDesc.hasImplicitUseOfPhysReg(Reg);
|
||||
for (MCRegUnit Unit : TRI->regunits(Reg))
|
||||
Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit));
|
||||
Uses.insert(PhysRegSUOper(&ExitSU, IsRealUse ? OpIdx : -1, Unit));
|
||||
} else if (Reg.isVirtual() && MO.readsReg()) {
|
||||
addVRegUseDeps(&ExitSU, MO.getOperandNo());
|
||||
addVRegUseDeps(&ExitSU, OpIdx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -15,12 +15,12 @@ define ptr addrspace(1) @call_assert_align() {
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s16, 2
|
||||
; CHECK-NEXT: s_addk_i32 s32, 0x400
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, ext@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, ext@rel32@hi+12
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 0
|
||||
; CHECK-NEXT: global_store_dword v[0:1], v2, off
|
||||
@ -45,11 +45,11 @@ define ptr addrspace(1) @tail_call_assert_align() {
|
||||
; CHECK-LABEL: tail_call_assert_align:
|
||||
; CHECK: ; %bb.0: ; %entry
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, ext@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, ext@rel32@hi+12
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: s_setpc_b64 s[16:17]
|
||||
entry:
|
||||
%call = tail call align 4 ptr addrspace(1) @ext(ptr addrspace(1) null)
|
||||
|
@ -44,8 +44,8 @@ define amdgpu_kernel void @kernel_caller_stack() {
|
||||
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
|
||||
; FLATSCR-NEXT: s_add_u32 s0, s32, 12
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v0, 11
|
||||
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
|
||||
; FLATSCR-NEXT: s_add_u32 s2, s32, 16
|
||||
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v0, 12
|
||||
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
|
||||
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4
|
||||
@ -239,11 +239,11 @@ define void @func_caller_stack() {
|
||||
; MUBUF-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v0, 12
|
||||
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
|
||||
; MUBUF-NEXT: s_getpc_b64 s[4:5]
|
||||
; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4
|
||||
; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12
|
||||
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
|
||||
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -274,15 +274,15 @@ define void @func_caller_stack() {
|
||||
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
|
||||
; FLATSCR-NEXT: s_add_u32 s0, s32, 12
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v0, 11
|
||||
; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
|
||||
; FLATSCR-NEXT: s_add_u32 s0, s32, 16
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v0, 12
|
||||
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
|
||||
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
|
||||
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4
|
||||
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12
|
||||
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -312,10 +312,10 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) {
|
||||
; MUBUF-NEXT: s_addk_i32 s32, 0x400
|
||||
; MUBUF-NEXT: v_writelane_b32 v40, s4, 2
|
||||
; MUBUF-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; MUBUF-NEXT: s_getpc_b64 s[4:5]
|
||||
; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_byval@rel32@lo+4
|
||||
; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_byval@rel32@hi+12
|
||||
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(1)
|
||||
; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(1)
|
||||
@ -394,8 +394,8 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) {
|
||||
; FLATSCR-NEXT: v_add_u32_e32 v3, 8, v0
|
||||
; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2
|
||||
; FLATSCR-NEXT: s_add_u32 s0, s32, 8
|
||||
; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; FLATSCR-NEXT: s_add_u32 s2, s32, 56
|
||||
; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s32
|
||||
|
@ -191,9 +191,9 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
|
||||
; GFX10-LABEL: divergent_i1_xor_used_outside_loop_larger_loop_body:
|
||||
; GFX10: ; %bb.0: ; %entry
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX10-NEXT: s_mov_b32 s5, 0
|
||||
; GFX10-NEXT: s_mov_b32 s6, -1
|
||||
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
|
||||
; GFX10-NEXT: s_cbranch_execz .LBB3_6
|
||||
; GFX10-NEXT: ; %bb.1: ; %loop.start.preheader
|
||||
|
@ -387,8 +387,8 @@ define amdgpu_ps void @and_i1_scc(i32 inreg %a, i32 inreg %b, ptr addrspace(1) %
|
||||
define amdgpu_ps void @divergent_phi_with_uniform_inputs(i32 %a, ptr addrspace(1) %out) {
|
||||
; OLD_RBS-LABEL: divergent_phi_with_uniform_inputs:
|
||||
; OLD_RBS: ; %bb.0: ; %A
|
||||
; OLD_RBS-NEXT: s_mov_b32 s0, 0
|
||||
; OLD_RBS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; OLD_RBS-NEXT: s_mov_b32 s0, 0
|
||||
; OLD_RBS-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; OLD_RBS-NEXT: ; %bb.1: ; %B
|
||||
; OLD_RBS-NEXT: s_mov_b32 s0, 1
|
||||
|
@ -25,10 +25,10 @@ define void @parent_func_missing_inputs() #0 {
|
||||
; FIXEDABI-NEXT: v_writelane_b32 v40, s16, 2
|
||||
; FIXEDABI-NEXT: s_addk_i32 s32, 0x400
|
||||
; FIXEDABI-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; FIXEDABI-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; FIXEDABI-NEXT: s_getpc_b64 s[16:17]
|
||||
; FIXEDABI-NEXT: s_add_u32 s16, s16, requires_all_inputs@rel32@lo+4
|
||||
; FIXEDABI-NEXT: s_addc_u32 s17, s17, requires_all_inputs@rel32@hi+12
|
||||
; FIXEDABI-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; FIXEDABI-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; FIXEDABI-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -49,21 +49,21 @@ define amdgpu_kernel void @parent_kernel_missing_inputs() #0 {
|
||||
; FIXEDABI-SDAG: ; %bb.0:
|
||||
; FIXEDABI-SDAG-NEXT: s_add_i32 s4, s4, s9
|
||||
; FIXEDABI-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
|
||||
; FIXEDABI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; FIXEDABI-SDAG-NEXT: s_add_u32 s0, s0, s9
|
||||
; FIXEDABI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; FIXEDABI-SDAG-NEXT: s_addc_u32 s1, s1, 0
|
||||
; FIXEDABI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; FIXEDABI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; FIXEDABI-SDAG-NEXT: s_addc_u32 s1, s1, 0
|
||||
; FIXEDABI-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s5
|
||||
; FIXEDABI-SDAG-NEXT: s_mov_b32 s14, s8
|
||||
; FIXEDABI-SDAG-NEXT: s_getpc_b64 s[4:5]
|
||||
; FIXEDABI-SDAG-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4
|
||||
; FIXEDABI-SDAG-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12
|
||||
; FIXEDABI-SDAG-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; FIXEDABI-SDAG-NEXT: s_mov_b64 s[8:9], 0
|
||||
; FIXEDABI-SDAG-NEXT: s_mov_b32 s12, s6
|
||||
; FIXEDABI-SDAG-NEXT: s_mov_b32 s13, s7
|
||||
; FIXEDABI-SDAG-NEXT: s_mov_b32 s32, 0
|
||||
; FIXEDABI-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s5
|
||||
; FIXEDABI-SDAG-NEXT: s_getpc_b64 s[4:5]
|
||||
; FIXEDABI-SDAG-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4
|
||||
; FIXEDABI-SDAG-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12
|
||||
; FIXEDABI-SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; FIXEDABI-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
@ -71,21 +71,21 @@ define amdgpu_kernel void @parent_kernel_missing_inputs() #0 {
|
||||
; FIXEDABI-GISEL: ; %bb.0:
|
||||
; FIXEDABI-GISEL-NEXT: s_add_i32 s4, s4, s9
|
||||
; FIXEDABI-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
|
||||
; FIXEDABI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; FIXEDABI-GISEL-NEXT: s_add_u32 s0, s0, s9
|
||||
; FIXEDABI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; FIXEDABI-GISEL-NEXT: s_addc_u32 s1, s1, 0
|
||||
; FIXEDABI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; FIXEDABI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2
|
||||
; FIXEDABI-GISEL-NEXT: s_addc_u32 s1, s1, 0
|
||||
; FIXEDABI-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s5
|
||||
; FIXEDABI-GISEL-NEXT: s_mov_b32 s14, s8
|
||||
; FIXEDABI-GISEL-NEXT: v_or_b32_e32 v31, v0, v1
|
||||
; FIXEDABI-GISEL-NEXT: s_getpc_b64 s[4:5]
|
||||
; FIXEDABI-GISEL-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4
|
||||
; FIXEDABI-GISEL-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12
|
||||
; FIXEDABI-GISEL-NEXT: s_mov_b64 s[8:9], 0
|
||||
; FIXEDABI-GISEL-NEXT: s_mov_b32 s12, s6
|
||||
; FIXEDABI-GISEL-NEXT: s_mov_b32 s13, s7
|
||||
; FIXEDABI-GISEL-NEXT: s_mov_b32 s32, 0
|
||||
; FIXEDABI-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s5
|
||||
; FIXEDABI-GISEL-NEXT: s_getpc_b64 s[4:5]
|
||||
; FIXEDABI-GISEL-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4
|
||||
; FIXEDABI-GISEL-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12
|
||||
; FIXEDABI-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; FIXEDABI-GISEL-NEXT: s_endpgm
|
||||
call void @requires_all_inputs()
|
||||
|
@ -1286,9 +1286,9 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
|
||||
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s4, -1
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s5, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s4
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s4, s6
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s6, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB2_2
|
||||
@ -1412,9 +1412,10 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
|
||||
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s4, -1
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s5, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s4
|
||||
; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s4, s6
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s6, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB2_2
|
||||
@ -1540,9 +1541,10 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
|
||||
; GFX1232_DPP-NEXT: s_or_saveexec_b32 s4, -1
|
||||
; GFX1232_DPP-NEXT: v_writelane_b32 v3, s5, 16
|
||||
; GFX1232_DPP-NEXT: s_mov_b32 exec_lo, s4
|
||||
; GFX1232_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1232_DPP-NEXT: s_mov_b32 s4, s6
|
||||
; GFX1232_DPP-NEXT: s_mov_b32 s6, -1
|
||||
; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1232_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1232_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
|
||||
; GFX1232_DPP-NEXT: s_cbranch_execz .LBB2_2
|
||||
@ -3129,8 +3131,8 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v8, s8, 16
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v7, s7, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s6
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s6, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s6, -1
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB5_2
|
||||
@ -4839,9 +4841,9 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
|
||||
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s4, -1
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s5, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s4
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s4, s6
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s6, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB8_2
|
||||
@ -4965,9 +4967,10 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
|
||||
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s4, -1
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s5, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s4
|
||||
; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s4, s6
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s6, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB8_2
|
||||
@ -5093,9 +5096,10 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
|
||||
; GFX1232_DPP-NEXT: s_or_saveexec_b32 s4, -1
|
||||
; GFX1232_DPP-NEXT: v_writelane_b32 v3, s5, 16
|
||||
; GFX1232_DPP-NEXT: s_mov_b32 exec_lo, s4
|
||||
; GFX1232_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1232_DPP-NEXT: s_mov_b32 s4, s6
|
||||
; GFX1232_DPP-NEXT: s_mov_b32 s6, -1
|
||||
; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1232_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1232_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
|
||||
; GFX1232_DPP-NEXT: s_cbranch_execz .LBB8_2
|
||||
@ -6715,8 +6719,8 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v8, s8, 16
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v7, s7, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s6
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s6, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s6, -1
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB11_2
|
||||
|
@ -919,9 +919,9 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) {
|
||||
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB2_2
|
||||
@ -1030,9 +1030,9 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) {
|
||||
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB2_2
|
||||
@ -2630,8 +2630,8 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out) {
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v8, s6, 16
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v7, s3, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB6_2
|
||||
@ -2812,8 +2812,8 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out) {
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v7, s6, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr8_vgpr9
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB6_2
|
||||
@ -3301,8 +3301,8 @@ define amdgpu_kernel void @add_i64_varying_nouse() {
|
||||
; GFX1032_DPP-NEXT: v_add_co_u32 v1, vcc_lo, v1, v3
|
||||
; GFX1032_DPP-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v4, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032_DPP-NEXT: v_mov_b32_e32 v7, v1
|
||||
; GFX1032_DPP-NEXT: v_mbcnt_lo_u32_b32 v9, exec_lo, 0
|
||||
; GFX1032_DPP-NEXT: v_mov_b32_e32 v7, v1
|
||||
; GFX1032_DPP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1032_DPP-NEXT: v_mov_b32_e32 v8, v2
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9
|
||||
@ -4341,9 +4341,9 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) {
|
||||
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB10_2
|
||||
@ -4452,9 +4452,9 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) {
|
||||
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB10_2
|
||||
@ -6075,8 +6075,8 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out) {
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v8, s6, 16
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v7, s3, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB14_2
|
||||
@ -6257,8 +6257,8 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out) {
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v7, s6, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr8_vgpr9
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB14_2
|
||||
@ -6757,9 +6757,9 @@ define amdgpu_kernel void @and_i32_varying(ptr addrspace(1) %out) {
|
||||
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB15_2
|
||||
@ -6868,9 +6868,10 @@ define amdgpu_kernel void @and_i32_varying(ptr addrspace(1) %out) {
|
||||
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB15_2
|
||||
@ -7464,8 +7465,8 @@ define amdgpu_kernel void @and_i64_varying(ptr addrspace(1) %out) {
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v6, s3, 16
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v5, s6, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB16_2
|
||||
@ -7621,8 +7622,8 @@ define amdgpu_kernel void @and_i64_varying(ptr addrspace(1) %out) {
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s6, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB16_2
|
||||
@ -8122,9 +8123,9 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) {
|
||||
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB17_2
|
||||
@ -8233,9 +8234,9 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) {
|
||||
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB17_2
|
||||
@ -8828,8 +8829,8 @@ define amdgpu_kernel void @or_i64_varying(ptr addrspace(1) %out) {
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v6, s3, 16
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v5, s6, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB18_2
|
||||
@ -8985,8 +8986,8 @@ define amdgpu_kernel void @or_i64_varying(ptr addrspace(1) %out) {
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s6, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB18_2
|
||||
@ -9486,9 +9487,9 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) {
|
||||
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB19_2
|
||||
@ -9597,9 +9598,9 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) {
|
||||
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB19_2
|
||||
@ -10192,8 +10193,8 @@ define amdgpu_kernel void @xor_i64_varying(ptr addrspace(1) %out) {
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v6, s3, 16
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v5, s6, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB20_2
|
||||
@ -10349,8 +10350,8 @@ define amdgpu_kernel void @xor_i64_varying(ptr addrspace(1) %out) {
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v6, s3, 16
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s6, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB20_2
|
||||
@ -10849,9 +10850,9 @@ define amdgpu_kernel void @max_i32_varying(ptr addrspace(1) %out) {
|
||||
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB21_2
|
||||
@ -10960,9 +10961,10 @@ define amdgpu_kernel void @max_i32_varying(ptr addrspace(1) %out) {
|
||||
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB21_2
|
||||
@ -11967,8 +11969,8 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v5, s3, 16
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v4, s6, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB23_2
|
||||
@ -12181,8 +12183,8 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s3, 16
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v4, s6, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB23_2
|
||||
@ -12682,9 +12684,9 @@ define amdgpu_kernel void @min_i32_varying(ptr addrspace(1) %out) {
|
||||
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB24_2
|
||||
@ -12793,9 +12795,10 @@ define amdgpu_kernel void @min_i32_varying(ptr addrspace(1) %out) {
|
||||
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB24_2
|
||||
@ -13800,8 +13803,8 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v5, s3, 16
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v4, s6, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB26_2
|
||||
@ -14014,8 +14017,8 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s3, 16
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v4, s6, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB26_2
|
||||
@ -14516,9 +14519,9 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) {
|
||||
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB27_2
|
||||
@ -14627,9 +14630,9 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) {
|
||||
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB27_2
|
||||
@ -15625,8 +15628,8 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v5, s3, 16
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v4, s6, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB29_2
|
||||
@ -15833,8 +15836,8 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s3, 16
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v4, s6, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB29_2
|
||||
@ -16334,9 +16337,9 @@ define amdgpu_kernel void @umin_i32_varying(ptr addrspace(1) %out) {
|
||||
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB30_2
|
||||
@ -16445,9 +16448,10 @@ define amdgpu_kernel void @umin_i32_varying(ptr addrspace(1) %out) {
|
||||
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s0, s2
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB30_2
|
||||
@ -17442,8 +17446,8 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v5, s3, 16
|
||||
; GFX1032_DPP-NEXT: v_writelane_b32 v4, s6, 16
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
||||
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
|
||||
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
||||
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB32_2
|
||||
@ -17650,8 +17654,8 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v5, s3, 16
|
||||
; GFX1132_DPP-NEXT: v_writelane_b32 v4, s6, 16
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s2
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
||||
; GFX1132_DPP-NEXT: s_mov_b32 s2, -1
|
||||
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
|
||||
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
||||
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB32_2
|
||||
|
@ -43,11 +43,11 @@ define void @test_call_external_void_func_i8_inreg(i8 inreg %arg) #0 {
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i8_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i8_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -103,11 +103,11 @@ define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 {
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i16_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i16_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -163,11 +163,11 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 {
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i32_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i32_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -223,12 +223,12 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 {
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s18, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i64_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i64_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -284,12 +284,12 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 {
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s18, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2i32_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2i32_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -345,13 +345,13 @@ define void @test_call_external_void_func_v3i32_inreg(<3 x i32> inreg %arg) #0 {
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s19, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_getpc_b64 s[20:21]
|
||||
; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v3i32_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v3i32_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s2, s18
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[20:21]
|
||||
; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v3i32_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v3i32_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -407,14 +407,14 @@ define void @test_call_external_void_func_v4i32_inreg(<4 x i32> inreg %arg) #0 {
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s20, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_getpc_b64 s[20:21]
|
||||
; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v4i32_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v4i32_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s3, s19
|
||||
; GFX9-NEXT: s_mov_b32 s2, s18
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[20:21]
|
||||
; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v4i32_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v4i32_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -470,6 +470,9 @@ define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 {
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s24, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_getpc_b64 s[24:25]
|
||||
; GFX9-NEXT: s_add_u32 s24, s24, external_void_func_v8i32_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s25, s25, external_void_func_v8i32_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s3, s19
|
||||
; GFX9-NEXT: s_mov_b32 s2, s18
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
@ -479,9 +482,6 @@ define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 {
|
||||
; GFX9-NEXT: s_mov_b32 s18, s22
|
||||
; GFX9-NEXT: s_mov_b32 s19, s23
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[24:25]
|
||||
; GFX9-NEXT: s_add_u32 s24, s24, external_void_func_v8i32_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s25, s25, external_void_func_v8i32_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[24:25]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -537,11 +537,11 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 {
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_f16_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_f16_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -597,11 +597,11 @@ define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_bf16_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_bf16_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -657,11 +657,11 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 {
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_f32_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_f32_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -717,12 +717,12 @@ define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 {
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s18, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_f64_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_f64_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -778,11 +778,11 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2f16_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2f16_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -839,11 +839,11 @@ define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg)
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2bf16_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2bf16_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -899,12 +899,12 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s18, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v3f16_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v3f16_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -960,12 +960,12 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s18, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v4f16_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v4f16_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -1021,12 +1021,12 @@ define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 {
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s18, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_p0_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_p0_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -1082,12 +1082,12 @@ define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg)
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s18, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_p1_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_p1_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -1143,11 +1143,11 @@ define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg)
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_p3_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_p3_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -1203,14 +1203,14 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s20, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_getpc_b64 s[20:21]
|
||||
; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v2p1_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v2p1_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s3, s19
|
||||
; GFX9-NEXT: s_mov_b32 s2, s18
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[20:21]
|
||||
; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v2p1_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v2p1_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -1266,12 +1266,12 @@ define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inre
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s18, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2p5_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2p5_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -1327,15 +1327,15 @@ define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inre
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s21, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_getpc_b64 s[22:23]
|
||||
; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s3, s19
|
||||
; GFX9-NEXT: s_mov_b32 s2, s18
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
; GFX9-NEXT: s_mov_b32 s0, s16
|
||||
; GFX9-NEXT: s_mov_b32 s16, s20
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[22:23]
|
||||
; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[22:23]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -1391,6 +1391,9 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) #
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s29, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_getpc_b64 vcc
|
||||
; GFX9-NEXT: s_add_u32 vcc_lo, vcc_lo, external_void_func_a15i32_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 vcc_hi, vcc_hi, external_void_func_a15i32_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s3, s19
|
||||
; GFX9-NEXT: s_mov_b32 s2, s18
|
||||
; GFX9-NEXT: s_mov_b32 s1, s17
|
||||
@ -1405,9 +1408,6 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) #
|
||||
; GFX9-NEXT: s_mov_b32 s23, s27
|
||||
; GFX9-NEXT: s_mov_b32 s24, s28
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 vcc
|
||||
; GFX9-NEXT: s_add_u32 vcc_lo, vcc_lo, external_void_func_a15i32_inreg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 vcc_hi, vcc_hi, external_void_func_a15i32_inreg@rel32@hi+12
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], vcc
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -1465,6 +1465,9 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inre
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s21, 2
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_getpc_b64 s[22:23]
|
||||
; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s3, s7
|
||||
; GFX9-NEXT: s_mov_b32 s2, s6
|
||||
; GFX9-NEXT: s_mov_b32 s1, s5
|
||||
@ -1480,9 +1483,6 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inre
|
||||
; GFX9-NEXT: s_mov_b32 s15, s19
|
||||
; GFX9-NEXT: s_mov_b32 s16, s20
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX9-NEXT: s_getpc_b64 s[22:23]
|
||||
; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@hi+12
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[22:23]
|
||||
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -102,10 +102,10 @@ define hidden void @void_func_void_clobber_vcc() #2 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_call_void_func_void_clobber_vcc:
|
||||
; GCN: s_mov_b64 s[34:35], vcc
|
||||
; GCN-NEXT: s_getpc_b64
|
||||
; GCN: s_getpc_b64
|
||||
; GCN-NEXT: s_add_u32
|
||||
; GCN-NEXT: s_addc_u32
|
||||
; GCN: s_mov_b64 s[34:35], vcc
|
||||
; GCN-NEXT: s_swappc_b64
|
||||
; GCN: s_mov_b64 vcc, s[34:35]
|
||||
define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(ptr addrspace(1) %out) #0 {
|
||||
@ -142,21 +142,27 @@ define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(ptr addrspace
|
||||
; FIXME: What is the expected behavior for reserved registers here?
|
||||
|
||||
; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33:
|
||||
; GCN: #ASMSTART
|
||||
; GCN-NEXT: ; def s33
|
||||
; GCN-NEXT: #ASMEND
|
||||
; FLATSCR: s_getpc_b64 s[0:1]
|
||||
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
|
||||
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
|
||||
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; MUBUF: s_getpc_b64 s[4:5]
|
||||
; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
|
||||
; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
|
||||
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
|
||||
; GCN: #ASMSTART
|
||||
; GCN-NEXT: ; def s33
|
||||
; GCN-NEXT: #ASMEND
|
||||
|
||||
; GCN-NOT: s33
|
||||
|
||||
; FLATSCR: s_swappc_b64 s[30:31], s[0:1]
|
||||
; MUBUF: s_swappc_b64 s[30:31], s[4:5]
|
||||
|
||||
; GCN-NOT: s33
|
||||
|
||||
; GCN: ;;#ASMSTART
|
||||
; GCN-NEXT: ; use s33
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; GCN-NOT: s33
|
||||
; GCN-NEXT: s_endpgm
|
||||
define amdgpu_kernel void @test_call_void_func_void_preserves_s33(ptr addrspace(1) %out) #0 {
|
||||
%s33 = call i32 asm sideeffect "; def $0", "={s33}"()
|
||||
@ -168,20 +174,20 @@ define amdgpu_kernel void @test_call_void_func_void_preserves_s33(ptr addrspace(
|
||||
; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s34: {{.*}}
|
||||
; GCN-NOT: s34
|
||||
|
||||
; GCN: s_mov_b32 s32, 0
|
||||
|
||||
; GCN-NOT: s34
|
||||
; GCN: ;;#ASMSTART
|
||||
; GCN-NEXT: ; def s34
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; FLATSCR: s_getpc_b64 s[0:1]
|
||||
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
|
||||
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
|
||||
; MUBUF: s_getpc_b64 s[4:5]
|
||||
; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
|
||||
; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
|
||||
; GCN: s_mov_b32 s32, 0
|
||||
|
||||
; GCN: ;;#ASMSTART
|
||||
; GCN-NEXT: ; def s34
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
|
||||
; GCN-NOT: s34
|
||||
|
||||
; MUBUF: s_swappc_b64 s[30:31], s[4:5]
|
||||
; FLATSCR: s_swappc_b64 s[30:31], s[0:1]
|
||||
|
||||
@ -200,19 +206,19 @@ define amdgpu_kernel void @test_call_void_func_void_preserves_s34(ptr addrspace(
|
||||
|
||||
; GCN-LABEL: {{^}}test_call_void_func_void_preserves_v40: {{.*}}
|
||||
|
||||
; GCN-NOT: v32
|
||||
; GCN: s_mov_b32 s32, 0
|
||||
; GCN-NOT: v40
|
||||
|
||||
; GCN: ;;#ASMSTART
|
||||
; GCN-NEXT: ; def v40
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; MUBUF: s_getpc_b64 s[4:5]
|
||||
; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
|
||||
; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
|
||||
; FLATSCR: s_getpc_b64 s[0:1]
|
||||
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
|
||||
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
|
||||
; GCN: s_mov_b32 s32, 0
|
||||
|
||||
; GCN: ;;#ASMSTART
|
||||
; GCN-NEXT: ; def v40
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
|
||||
; GCN-NOT: v40
|
||||
|
||||
; MUBUF: s_swappc_b64 s[30:31], s[4:5]
|
||||
; FLATSCR: s_swappc_b64 s[30:31], s[0:1]
|
||||
@ -255,10 +261,10 @@ define hidden void @void_func_void_clobber_s34() #2 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_call_void_func_void_clobber_s33:
|
||||
; GCN: s_mov_b32 s32, 0
|
||||
; GCN: s_getpc_b64
|
||||
; GCN-NEXT: s_add_u32
|
||||
; GCN-NEXT: s_addc_u32
|
||||
; GCN: s_mov_b32 s32, 0
|
||||
; GCN: s_swappc_b64
|
||||
; GCN-NEXT: s_endpgm
|
||||
define amdgpu_kernel void @test_call_void_func_void_clobber_s33() #0 {
|
||||
@ -267,10 +273,10 @@ define amdgpu_kernel void @test_call_void_func_void_clobber_s33() #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_call_void_func_void_clobber_s34:
|
||||
; GCN: s_mov_b32 s32, 0
|
||||
; GCN: s_getpc_b64
|
||||
; GCN-NEXT: s_add_u32
|
||||
; GCN-NEXT: s_addc_u32
|
||||
; GCN: s_mov_b32 s32, 0
|
||||
; GCN: s_swappc_b64
|
||||
; GCN-NEXT: s_endpgm
|
||||
define amdgpu_kernel void @test_call_void_func_void_clobber_s34() #0 {
|
||||
|
@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck %s -check-prefix=GISEL
|
||||
|
||||
; Check for optimizing the passed implicit workitem ID based on the
|
||||
; required group size. This should avoid a few bit packing operations.
|
||||
@ -13,15 +13,30 @@ define amdgpu_kernel void @known_x_0(ptr addrspace(1) %out) !reqd_work_group_siz
|
||||
; CHECK-NEXT: s_add_u32 flat_scratch_lo, s12, s17
|
||||
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
||||
; CHECK-NEXT: s_add_u32 s0, s0, s17
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 20, v2
|
||||
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
||||
; CHECK-NEXT: v_lshl_or_b32 v31, v1, 10, v0
|
||||
; CHECK-NEXT: s_mov_b32 s32, 0
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 20, v2
|
||||
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||||
; CHECK-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
|
||||
; CHECK-NEXT: v_lshl_or_b32 v31, v1, 10, v0
|
||||
; CHECK-NEXT: s_mov_b32 s32, 0
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; CHECK-NEXT: s_endpgm
|
||||
;
|
||||
; GISEL-LABEL: known_x_0:
|
||||
; GISEL: ; %bb.0:
|
||||
; GISEL-NEXT: s_add_u32 flat_scratch_lo, s12, s17
|
||||
; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
||||
; GISEL-NEXT: s_add_u32 s0, s0, s17
|
||||
; GISEL-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GISEL-NEXT: v_lshlrev_b32_e32 v0, 20, v2
|
||||
; GISEL-NEXT: v_lshl_or_b32 v31, v1, 10, v0
|
||||
; GISEL-NEXT: s_getpc_b64 s[4:5]
|
||||
; GISEL-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
|
||||
; GISEL-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
|
||||
; GISEL-NEXT: s_mov_b32 s32, 0
|
||||
; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GISEL-NEXT: s_endpgm
|
||||
call void @callee()
|
||||
ret void
|
||||
}
|
||||
@ -34,13 +49,27 @@ define amdgpu_kernel void @known_y_0(ptr addrspace(1) %out) !reqd_work_group_siz
|
||||
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
||||
; CHECK-NEXT: s_add_u32 s0, s0, s17
|
||||
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
||||
; CHECK-NEXT: v_lshl_or_b32 v31, v2, 20, v0
|
||||
; CHECK-NEXT: s_mov_b32 s32, 0
|
||||
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||||
; CHECK-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
|
||||
; CHECK-NEXT: v_lshl_or_b32 v31, v2, 20, v0
|
||||
; CHECK-NEXT: s_mov_b32 s32, 0
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; CHECK-NEXT: s_endpgm
|
||||
;
|
||||
; GISEL-LABEL: known_y_0:
|
||||
; GISEL: ; %bb.0:
|
||||
; GISEL-NEXT: s_add_u32 flat_scratch_lo, s12, s17
|
||||
; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
||||
; GISEL-NEXT: s_add_u32 s0, s0, s17
|
||||
; GISEL-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GISEL-NEXT: v_lshl_or_b32 v31, v2, 20, v0
|
||||
; GISEL-NEXT: s_getpc_b64 s[4:5]
|
||||
; GISEL-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
|
||||
; GISEL-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
|
||||
; GISEL-NEXT: s_mov_b32 s32, 0
|
||||
; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GISEL-NEXT: s_endpgm
|
||||
call void @callee()
|
||||
ret void
|
||||
}
|
||||
@ -53,13 +82,27 @@ define amdgpu_kernel void @known_z_0(ptr addrspace(1) %out) !reqd_work_group_siz
|
||||
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
||||
; CHECK-NEXT: s_add_u32 s0, s0, s17
|
||||
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
||||
; CHECK-NEXT: v_lshl_or_b32 v31, v1, 10, v0
|
||||
; CHECK-NEXT: s_mov_b32 s32, 0
|
||||
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||||
; CHECK-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
|
||||
; CHECK-NEXT: v_lshl_or_b32 v31, v1, 10, v0
|
||||
; CHECK-NEXT: s_mov_b32 s32, 0
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; CHECK-NEXT: s_endpgm
|
||||
;
|
||||
; GISEL-LABEL: known_z_0:
|
||||
; GISEL: ; %bb.0:
|
||||
; GISEL-NEXT: s_add_u32 flat_scratch_lo, s12, s17
|
||||
; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
||||
; GISEL-NEXT: s_add_u32 s0, s0, s17
|
||||
; GISEL-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GISEL-NEXT: v_lshl_or_b32 v31, v1, 10, v0
|
||||
; GISEL-NEXT: s_getpc_b64 s[4:5]
|
||||
; GISEL-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
|
||||
; GISEL-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
|
||||
; GISEL-NEXT: s_mov_b32 s32, 0
|
||||
; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GISEL-NEXT: s_endpgm
|
||||
call void @callee()
|
||||
ret void
|
||||
}
|
||||
@ -72,13 +115,27 @@ define amdgpu_kernel void @known_yz_0(ptr addrspace(1) %out) !reqd_work_group_si
|
||||
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
||||
; CHECK-NEXT: s_add_u32 s0, s0, s17
|
||||
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v31, v0
|
||||
; CHECK-NEXT: s_mov_b32 s32, 0
|
||||
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||||
; CHECK-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
|
||||
; CHECK-NEXT: v_mov_b32_e32 v31, v0
|
||||
; CHECK-NEXT: s_mov_b32 s32, 0
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; CHECK-NEXT: s_endpgm
|
||||
;
|
||||
; GISEL-LABEL: known_yz_0:
|
||||
; GISEL: ; %bb.0:
|
||||
; GISEL-NEXT: s_add_u32 flat_scratch_lo, s12, s17
|
||||
; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
||||
; GISEL-NEXT: s_add_u32 s0, s0, s17
|
||||
; GISEL-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GISEL-NEXT: s_getpc_b64 s[4:5]
|
||||
; GISEL-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
|
||||
; GISEL-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
|
||||
; GISEL-NEXT: v_mov_b32_e32 v31, v0
|
||||
; GISEL-NEXT: s_mov_b32 s32, 0
|
||||
; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GISEL-NEXT: s_endpgm
|
||||
call void @callee()
|
||||
ret void
|
||||
}
|
||||
@ -91,13 +148,27 @@ define amdgpu_kernel void @known_xz_0(ptr addrspace(1) %out) !reqd_work_group_si
|
||||
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
||||
; CHECK-NEXT: s_add_u32 s0, s0, s17
|
||||
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v31, 10, v1
|
||||
; CHECK-NEXT: s_mov_b32 s32, 0
|
||||
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||||
; CHECK-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v31, 10, v1
|
||||
; CHECK-NEXT: s_mov_b32 s32, 0
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; CHECK-NEXT: s_endpgm
|
||||
;
|
||||
; GISEL-LABEL: known_xz_0:
|
||||
; GISEL: ; %bb.0:
|
||||
; GISEL-NEXT: s_add_u32 flat_scratch_lo, s12, s17
|
||||
; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
||||
; GISEL-NEXT: s_add_u32 s0, s0, s17
|
||||
; GISEL-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GISEL-NEXT: v_lshlrev_b32_e32 v31, 10, v1
|
||||
; GISEL-NEXT: s_getpc_b64 s[4:5]
|
||||
; GISEL-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
|
||||
; GISEL-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
|
||||
; GISEL-NEXT: s_mov_b32 s32, 0
|
||||
; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GISEL-NEXT: s_endpgm
|
||||
call void @callee()
|
||||
ret void
|
||||
}
|
||||
@ -111,13 +182,27 @@ define amdgpu_kernel void @known_xyz_0(ptr addrspace(1) %out) !reqd_work_group_s
|
||||
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
||||
; CHECK-NEXT: s_add_u32 s0, s0, s17
|
||||
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v31, 0
|
||||
; CHECK-NEXT: s_mov_b32 s32, 0
|
||||
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||||
; CHECK-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
|
||||
; CHECK-NEXT: v_mov_b32_e32 v31, 0
|
||||
; CHECK-NEXT: s_mov_b32 s32, 0
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; CHECK-NEXT: s_endpgm
|
||||
;
|
||||
; GISEL-LABEL: known_xyz_0:
|
||||
; GISEL: ; %bb.0:
|
||||
; GISEL-NEXT: s_add_u32 flat_scratch_lo, s12, s17
|
||||
; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
||||
; GISEL-NEXT: s_add_u32 s0, s0, s17
|
||||
; GISEL-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GISEL-NEXT: s_getpc_b64 s[4:5]
|
||||
; GISEL-NEXT: s_add_u32 s4, s4, callee@rel32@lo+4
|
||||
; GISEL-NEXT: s_addc_u32 s5, s5, callee@rel32@hi+12
|
||||
; GISEL-NEXT: v_mov_b32_e32 v31, 0
|
||||
; GISEL-NEXT: s_mov_b32 s32, 0
|
||||
; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GISEL-NEXT: s_endpgm
|
||||
call void @callee()
|
||||
ret void
|
||||
}
|
||||
|
@ -13,11 +13,11 @@ define amdgpu_kernel void @call_memory_arg_load(ptr addrspace(3) %ptr, i32) #0 {
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s6
|
||||
; GCN-NEXT: ds_read_b32 v0, v0
|
||||
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
|
||||
; GCN-NEXT: s_mov_b32 s32, 0
|
||||
; GCN-NEXT: s_getpc_b64 s[8:9]
|
||||
; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
|
||||
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
|
||||
; GCN-NEXT: s_mov_b32 s32, 0
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
|
||||
; GCN-NEXT: s_endpgm
|
||||
%vgpr = load volatile i32, ptr addrspace(3) %ptr
|
||||
@ -33,16 +33,16 @@ define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 {
|
||||
; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11
|
||||
; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
|
||||
; GCN-NEXT: s_add_u32 s0, s0, s11
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: s_getpc_b64 s[8:9]
|
||||
; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: global_store_dword v0, v0, s[6:7]
|
||||
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: s_mov_b32 s32, 0
|
||||
; GCN-NEXT: s_getpc_b64 s[8:9]
|
||||
; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
|
||||
; GCN-NEXT: s_endpgm
|
||||
store i32 0, ptr addrspace(1) %ptr
|
||||
@ -55,16 +55,16 @@ define amdgpu_kernel void @call_no_wait_after_call(ptr addrspace(1) %ptr, i32) #
|
||||
; GCN-LABEL: call_no_wait_after_call:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11
|
||||
; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
|
||||
; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
|
||||
; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
|
||||
; GCN-NEXT: s_add_u32 s0, s0, s11
|
||||
; GCN-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: s_mov_b32 s32, 0
|
||||
; GCN-NEXT: s_getpc_b64 s[8:9]
|
||||
; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
|
||||
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: s_mov_b32 s32, 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v40, 0
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
|
||||
; GCN-NEXT: global_store_dword v40, v40, s[34:35]
|
||||
@ -78,16 +78,16 @@ define amdgpu_kernel void @call_no_wait_after_call_return_val(ptr addrspace(1) %
|
||||
; GCN-LABEL: call_no_wait_after_call_return_val:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11
|
||||
; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
|
||||
; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
|
||||
; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
|
||||
; GCN-NEXT: s_add_u32 s0, s0, s11
|
||||
; GCN-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: s_mov_b32 s32, 0
|
||||
; GCN-NEXT: s_getpc_b64 s[8:9]
|
||||
; GCN-NEXT: s_add_u32 s8, s8, func.return@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s9, s9, func.return@rel32@hi+12
|
||||
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: s_mov_b32 s32, 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v40, 0
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
|
||||
; GCN-NEXT: global_store_dword v40, v0, s[34:35]
|
||||
|
@ -125,12 +125,12 @@ define void @callee_with_stack_and_call() #0 {
|
||||
; MUBUF-NEXT: s_addk_i32 s32, 0x400
|
||||
; MUBUF-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
|
||||
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: s_getpc_b64 s[16:17]
|
||||
; MUBUF-NEXT: s_add_u32 s16, s16, external_void_func_void@rel32@lo+4
|
||||
; MUBUF-NEXT: s_addc_u32 s17, s17, external_void_func_void@rel32@hi+12
|
||||
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -155,12 +155,12 @@ define void @callee_with_stack_and_call() #0 {
|
||||
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
|
||||
; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
|
||||
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; FLATSCR-NEXT: scratch_store_dword off, v0, s33
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
|
||||
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
|
||||
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
|
||||
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; FLATSCR-NEXT: scratch_store_dword off, v0, s33
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -196,10 +196,10 @@ define void @callee_no_stack_with_call() #0 {
|
||||
; MUBUF-NEXT: v_writelane_b32 v40, s16, 2
|
||||
; MUBUF-NEXT: s_addk_i32 s32, 0x400
|
||||
; MUBUF-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; MUBUF-NEXT: s_getpc_b64 s[16:17]
|
||||
; MUBUF-NEXT: s_add_u32 s16, s16, external_void_func_void@rel32@lo+4
|
||||
; MUBUF-NEXT: s_addc_u32 s17, s17, external_void_func_void@rel32@hi+12
|
||||
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -223,10 +223,10 @@ define void @callee_no_stack_with_call() #0 {
|
||||
; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2
|
||||
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
|
||||
; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
|
||||
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
|
||||
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
|
||||
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -1595,12 +1595,12 @@ define void @ipra_call_with_stack() #0 {
|
||||
; MUBUF-NEXT: s_addk_i32 s32, 0x400
|
||||
; MUBUF-NEXT: v_writelane_b32 v1, s30, 0
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
|
||||
; MUBUF-NEXT: v_writelane_b32 v1, s31, 1
|
||||
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: s_getpc_b64 s[16:17]
|
||||
; MUBUF-NEXT: s_add_u32 s16, s16, local_empty_func@rel32@lo+4
|
||||
; MUBUF-NEXT: s_addc_u32 s17, s17, local_empty_func@rel32@hi+12
|
||||
; MUBUF-NEXT: v_writelane_b32 v1, s31, 1
|
||||
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; MUBUF-NEXT: v_readlane_b32 s31, v1, 1
|
||||
; MUBUF-NEXT: v_readlane_b32 s30, v1, 0
|
||||
@ -1623,12 +1623,12 @@ define void @ipra_call_with_stack() #0 {
|
||||
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
|
||||
; FLATSCR-NEXT: v_writelane_b32 v1, s30, 0
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
|
||||
; FLATSCR-NEXT: v_writelane_b32 v1, s31, 1
|
||||
; FLATSCR-NEXT: scratch_store_dword off, v0, s33
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
|
||||
; FLATSCR-NEXT: s_add_u32 s0, s0, local_empty_func@rel32@lo+4
|
||||
; FLATSCR-NEXT: s_addc_u32 s1, s1, local_empty_func@rel32@hi+12
|
||||
; FLATSCR-NEXT: v_writelane_b32 v1, s31, 1
|
||||
; FLATSCR-NEXT: scratch_store_dword off, v0, s33
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; FLATSCR-NEXT: v_readlane_b32 s31, v1, 1
|
||||
; FLATSCR-NEXT: v_readlane_b32 s30, v1, 0
|
||||
|
@ -198,11 +198,12 @@ define hidden void @use_workgroup_id_yz() #1 {
|
||||
|
||||
; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x:
|
||||
; GCN-NOT: s6
|
||||
; GCN: s_mov_b32 s12, s6
|
||||
; GCN: s_mov_b32 s32, 0
|
||||
; GCN: s_getpc_b64 s[4:5]
|
||||
; GCN-NEXT: s_add_u32 s4, s4, use_workgroup_id_x@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s5, s5, use_workgroup_id_x@rel32@hi+12
|
||||
; GCN-NOT: s6
|
||||
; GCN: s_mov_b32 s12, s6
|
||||
; GCN: s_mov_b32 s32, 0
|
||||
; GCN: s_swappc_b64
|
||||
; GCN-NEXT: s_endpgm
|
||||
|
||||
|
@ -69,20 +69,20 @@ define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 {
|
||||
; GFX803: ; %bb.0: ; %entry
|
||||
; GFX803-NEXT: s_add_i32 s12, s12, s17
|
||||
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
||||
; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX803-NEXT: s_add_u32 s0, s0, s17
|
||||
; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX803-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX803-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s13
|
||||
; GFX803-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX803-NEXT: s_mov_b32 s13, s15
|
||||
; GFX803-NEXT: s_mov_b32 s12, s14
|
||||
; GFX803-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX803-NEXT: s_mov_b32 s14, s16
|
||||
; GFX803-NEXT: s_mov_b32 s32, 0
|
||||
; GFX803-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX803-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
|
||||
; GFX803-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
|
||||
; GFX803-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX803-NEXT: s_mov_b32 s14, s16
|
||||
; GFX803-NEXT: s_mov_b32 s32, 0
|
||||
; GFX803-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX803-NEXT: s_endpgm
|
||||
;
|
||||
@ -91,17 +91,17 @@ define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 {
|
||||
; GFX900-NEXT: s_add_u32 flat_scratch_lo, s12, s17
|
||||
; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
||||
; GFX900-NEXT: s_add_u32 s0, s0, s17
|
||||
; GFX900-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; GFX900-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX900-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX900-NEXT: s_mov_b32 s13, s15
|
||||
; GFX900-NEXT: s_mov_b32 s12, s14
|
||||
; GFX900-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX900-NEXT: s_mov_b32 s14, s16
|
||||
; GFX900-NEXT: s_mov_b32 s32, 0
|
||||
; GFX900-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX900-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
|
||||
; GFX900-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
|
||||
; GFX900-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX900-NEXT: s_mov_b32 s14, s16
|
||||
; GFX900-NEXT: s_mov_b32 s32, 0
|
||||
; GFX900-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX900-NEXT: s_endpgm
|
||||
;
|
||||
@ -119,10 +119,10 @@ define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 {
|
||||
; GFX1010-NEXT: s_mov_b32 s13, s15
|
||||
; GFX1010-NEXT: s_mov_b32 s12, s14
|
||||
; GFX1010-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX1010-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1010-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX1010-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
|
||||
; GFX1010-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
|
||||
; GFX1010-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1010-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1010-NEXT: s_endpgm
|
||||
;
|
||||
@ -132,14 +132,14 @@ define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 {
|
||||
; GFX1100-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1100-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1100-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GFX1100-NEXT: s_getpc_b64 s[16:17]
|
||||
; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
|
||||
; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
|
||||
; GFX1100-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1100-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1100-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1100-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1100-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1100-NEXT: s_getpc_b64 s[16:17]
|
||||
; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
|
||||
; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
|
||||
; GFX1100-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1100-NEXT: s_endpgm
|
||||
|
||||
@ -153,23 +153,23 @@ define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
|
||||
; GFX803: ; %bb.0: ; %entry
|
||||
; GFX803-NEXT: s_add_i32 s12, s12, s17
|
||||
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
||||
; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX803-NEXT: s_add_u32 s0, s0, s17
|
||||
; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX803-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX803-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s13
|
||||
; GFX803-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX803-NEXT: s_mov_b32 s13, s15
|
||||
; GFX803-NEXT: s_mov_b32 s12, s14
|
||||
; GFX803-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GFX803-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX803-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
|
||||
; GFX803-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
|
||||
; GFX803-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX803-NEXT: s_mov_b32 s14, s16
|
||||
; GFX803-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX803-NEXT: buffer_store_dword v3, off, s[0:3], 0
|
||||
; GFX803-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX803-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX803-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
|
||||
; GFX803-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
|
||||
; GFX803-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX803-NEXT: s_endpgm
|
||||
;
|
||||
@ -178,20 +178,20 @@ define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
|
||||
; GFX900-NEXT: s_add_u32 flat_scratch_lo, s12, s17
|
||||
; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
||||
; GFX900-NEXT: s_add_u32 s0, s0, s17
|
||||
; GFX900-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; GFX900-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX900-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX900-NEXT: s_mov_b32 s13, s15
|
||||
; GFX900-NEXT: s_mov_b32 s12, s14
|
||||
; GFX900-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GFX900-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX900-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
|
||||
; GFX900-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
|
||||
; GFX900-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX900-NEXT: s_mov_b32 s14, s16
|
||||
; GFX900-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX900-NEXT: buffer_store_dword v3, off, s[0:3], 0
|
||||
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX900-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX900-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
|
||||
; GFX900-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
|
||||
; GFX900-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX900-NEXT: s_endpgm
|
||||
;
|
||||
@ -210,12 +210,12 @@ define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
|
||||
; GFX1010-NEXT: s_mov_b32 s13, s15
|
||||
; GFX1010-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX1010-NEXT: s_mov_b32 s12, s14
|
||||
; GFX1010-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1010-NEXT: buffer_store_dword v3, off, s[0:3], 0
|
||||
; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1010-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX1010-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
|
||||
; GFX1010-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
|
||||
; GFX1010-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1010-NEXT: buffer_store_dword v3, off, s[0:3], 0
|
||||
; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1010-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1010-NEXT: s_endpgm
|
||||
;
|
||||
@ -226,6 +226,9 @@ define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
|
||||
; GFX1100-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1100-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1100-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GFX1100-NEXT: s_getpc_b64 s[16:17]
|
||||
; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
|
||||
; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
|
||||
; GFX1100-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1100-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1100-NEXT: s_mov_b32 s13, s14
|
||||
@ -233,9 +236,6 @@ define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
|
||||
; GFX1100-NEXT: s_mov_b32 s32, 16
|
||||
; GFX1100-NEXT: scratch_store_b32 off, v1, off dlc
|
||||
; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1100-NEXT: s_getpc_b64 s[16:17]
|
||||
; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
|
||||
; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
|
||||
; GFX1100-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1100-NEXT: s_endpgm
|
||||
|
||||
@ -320,21 +320,21 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
|
||||
; GFX803: ; %bb.0: ; %entry
|
||||
; GFX803-NEXT: s_add_i32 s12, s12, s17
|
||||
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
||||
; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX803-NEXT: s_add_u32 s0, s0, s17
|
||||
; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX803-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX803-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s13
|
||||
; GFX803-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX803-NEXT: s_mov_b32 s13, s15
|
||||
; GFX803-NEXT: s_mov_b32 s12, s14
|
||||
; GFX803-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX803-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
|
||||
; GFX803-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
|
||||
; GFX803-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX803-NEXT: s_mov_b32 s14, s16
|
||||
; GFX803-NEXT: s_mov_b32 s33, 0
|
||||
; GFX803-NEXT: s_mov_b32 s32, 0
|
||||
; GFX803-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX803-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
|
||||
; GFX803-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
|
||||
; GFX803-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX803-NEXT: s_endpgm
|
||||
;
|
||||
@ -343,18 +343,18 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
|
||||
; GFX900-NEXT: s_add_u32 flat_scratch_lo, s12, s17
|
||||
; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
||||
; GFX900-NEXT: s_add_u32 s0, s0, s17
|
||||
; GFX900-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; GFX900-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX900-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX900-NEXT: s_mov_b32 s13, s15
|
||||
; GFX900-NEXT: s_mov_b32 s12, s14
|
||||
; GFX900-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX900-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
|
||||
; GFX900-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
|
||||
; GFX900-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX900-NEXT: s_mov_b32 s14, s16
|
||||
; GFX900-NEXT: s_mov_b32 s33, 0
|
||||
; GFX900-NEXT: s_mov_b32 s32, 0
|
||||
; GFX900-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX900-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
|
||||
; GFX900-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
|
||||
; GFX900-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX900-NEXT: s_endpgm
|
||||
;
|
||||
@ -373,10 +373,10 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
|
||||
; GFX1010-NEXT: s_mov_b32 s13, s15
|
||||
; GFX1010-NEXT: s_mov_b32 s12, s14
|
||||
; GFX1010-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX1010-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1010-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX1010-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
|
||||
; GFX1010-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
|
||||
; GFX1010-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1010-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1010-NEXT: s_endpgm
|
||||
;
|
||||
@ -386,15 +386,15 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
|
||||
; GFX1100-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1100-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1100-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GFX1100-NEXT: s_getpc_b64 s[16:17]
|
||||
; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
|
||||
; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
|
||||
; GFX1100-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1100-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1100-NEXT: s_mov_b32 s13, s14
|
||||
; GFX1100-NEXT: s_mov_b32 s14, s15
|
||||
; GFX1100-NEXT: s_mov_b32 s33, 0
|
||||
; GFX1100-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1100-NEXT: s_getpc_b64 s[16:17]
|
||||
; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
|
||||
; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
|
||||
; GFX1100-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1100-NEXT: s_endpgm
|
||||
; GFX1010-NEXT s_add_u32 s12, s12, s17
|
||||
@ -426,24 +426,24 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
|
||||
; GFX803: ; %bb.0: ; %entry
|
||||
; GFX803-NEXT: s_add_i32 s12, s12, s17
|
||||
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
||||
; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX803-NEXT: s_add_u32 s0, s0, s17
|
||||
; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX803-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX803-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX803-NEXT: s_mov_b32 s33, 0
|
||||
; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s13
|
||||
; GFX803-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX803-NEXT: s_mov_b32 s13, s15
|
||||
; GFX803-NEXT: s_mov_b32 s12, s14
|
||||
; GFX803-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GFX803-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX803-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
|
||||
; GFX803-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
|
||||
; GFX803-NEXT: v_or_b32_e32 v31, v0, v2
|
||||
; GFX803-NEXT: s_mov_b32 s14, s16
|
||||
; GFX803-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX803-NEXT: buffer_store_dword v3, off, s[0:3], s33
|
||||
; GFX803-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX803-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX803-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
|
||||
; GFX803-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
|
||||
; GFX803-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX803-NEXT: s_endpgm
|
||||
;
|
||||
@ -452,21 +452,21 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
|
||||
; GFX900-NEXT: s_add_u32 flat_scratch_lo, s12, s17
|
||||
; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
||||
; GFX900-NEXT: s_add_u32 s0, s0, s17
|
||||
; GFX900-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; GFX900-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX900-NEXT: s_mov_b32 s33, 0
|
||||
; GFX900-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX900-NEXT: s_mov_b32 s13, s15
|
||||
; GFX900-NEXT: s_mov_b32 s12, s14
|
||||
; GFX900-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GFX900-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX900-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
|
||||
; GFX900-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
|
||||
; GFX900-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX900-NEXT: s_mov_b32 s14, s16
|
||||
; GFX900-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX900-NEXT: buffer_store_dword v3, off, s[0:3], s33
|
||||
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX900-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX900-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
|
||||
; GFX900-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
|
||||
; GFX900-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX900-NEXT: s_endpgm
|
||||
;
|
||||
@ -486,12 +486,12 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
|
||||
; GFX1010-NEXT: s_mov_b32 s13, s15
|
||||
; GFX1010-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX1010-NEXT: s_mov_b32 s12, s14
|
||||
; GFX1010-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1010-NEXT: buffer_store_dword v3, off, s[0:3], s33
|
||||
; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1010-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX1010-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4
|
||||
; GFX1010-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12
|
||||
; GFX1010-NEXT: s_mov_b32 s14, s16
|
||||
; GFX1010-NEXT: buffer_store_dword v3, off, s[0:3], s33
|
||||
; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1010-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX1010-NEXT: s_endpgm
|
||||
;
|
||||
@ -503,6 +503,9 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
|
||||
; GFX1100-NEXT: s_mov_b32 s12, s13
|
||||
; GFX1100-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1100-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GFX1100-NEXT: s_getpc_b64 s[16:17]
|
||||
; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
|
||||
; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
|
||||
; GFX1100-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1100-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1100-NEXT: s_mov_b32 s13, s14
|
||||
@ -510,9 +513,6 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
|
||||
; GFX1100-NEXT: s_mov_b32 s32, 16
|
||||
; GFX1100-NEXT: scratch_store_b32 off, v1, s33 dlc
|
||||
; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1100-NEXT: s_getpc_b64 s[16:17]
|
||||
; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
|
||||
; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
|
||||
; GFX1100-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX1100-NEXT: s_endpgm
|
||||
entry:
|
||||
|
@ -35,10 +35,10 @@ define float @call_split_type_used_outside_block_v2f32() #0 {
|
||||
; GCN-NEXT: v_writelane_b32 v40, s16, 2
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x400
|
||||
; GCN-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GCN-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GCN-NEXT: s_getpc_b64 s[16:17]
|
||||
; GCN-NEXT: s_add_u32 s16, s16, func_v2f32@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s17, s17, func_v2f32@rel32@hi+12
|
||||
; GCN-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GCN-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GCN-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -71,10 +71,10 @@ define float @call_split_type_used_outside_block_v3f32() #0 {
|
||||
; GCN-NEXT: v_writelane_b32 v40, s16, 2
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x400
|
||||
; GCN-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GCN-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GCN-NEXT: s_getpc_b64 s[16:17]
|
||||
; GCN-NEXT: s_add_u32 s16, s16, func_v3f32@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s17, s17, func_v3f32@rel32@hi+12
|
||||
; GCN-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GCN-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GCN-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -107,10 +107,10 @@ define half @call_split_type_used_outside_block_v4f16() #0 {
|
||||
; GCN-NEXT: v_writelane_b32 v40, s16, 2
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x400
|
||||
; GCN-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GCN-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GCN-NEXT: s_getpc_b64 s[16:17]
|
||||
; GCN-NEXT: s_add_u32 s16, s16, func_v4f16@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s17, s17, func_v4f16@rel32@hi+12
|
||||
; GCN-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GCN-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GCN-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -143,10 +143,10 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 {
|
||||
; GCN-NEXT: v_writelane_b32 v40, s16, 2
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x400
|
||||
; GCN-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GCN-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GCN-NEXT: s_getpc_b64 s[16:17]
|
||||
; GCN-NEXT: s_add_u32 s16, s16, func_struct@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s17, s17, func_struct@rel32@hi+12
|
||||
; GCN-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GCN-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GCN-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -189,16 +189,16 @@ define amdgpu_kernel void @v3i16_registers(i1 %cond) #0 {
|
||||
; GCN-NEXT: s_cbranch_vccnz .LBB4_2
|
||||
; GCN-NEXT: ; %bb.1: ; %if.else
|
||||
; GCN-NEXT: s_add_u32 s8, s8, 8
|
||||
; GCN-NEXT: s_addc_u32 s9, s9, 0
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GCN-NEXT: s_addc_u32 s9, s9, 0
|
||||
; GCN-NEXT: s_getpc_b64 s[18:19]
|
||||
; GCN-NEXT: s_add_u32 s18, s18, func_v3i16@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s19, s19, func_v3i16@rel32@hi+12
|
||||
; GCN-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GCN-NEXT: s_mov_b32 s12, s14
|
||||
; GCN-NEXT: s_mov_b32 s13, s15
|
||||
; GCN-NEXT: s_mov_b32 s14, s16
|
||||
; GCN-NEXT: s_getpc_b64 s[18:19]
|
||||
; GCN-NEXT: s_add_u32 s18, s18, func_v3i16@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s19, s19, func_v3i16@rel32@hi+12
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GCN-NEXT: s_branch .LBB4_3
|
||||
; GCN-NEXT: .LBB4_2:
|
||||
@ -240,16 +240,16 @@ define amdgpu_kernel void @v3f16_registers(i1 %cond) #0 {
|
||||
; GCN-NEXT: s_cbranch_vccnz .LBB5_2
|
||||
; GCN-NEXT: ; %bb.1: ; %if.else
|
||||
; GCN-NEXT: s_add_u32 s8, s8, 8
|
||||
; GCN-NEXT: s_addc_u32 s9, s9, 0
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GCN-NEXT: s_addc_u32 s9, s9, 0
|
||||
; GCN-NEXT: s_getpc_b64 s[18:19]
|
||||
; GCN-NEXT: s_add_u32 s18, s18, func_v3f16@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s19, s19, func_v3f16@rel32@hi+12
|
||||
; GCN-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GCN-NEXT: s_mov_b32 s12, s14
|
||||
; GCN-NEXT: s_mov_b32 s13, s15
|
||||
; GCN-NEXT: s_mov_b32 s14, s16
|
||||
; GCN-NEXT: s_getpc_b64 s[18:19]
|
||||
; GCN-NEXT: s_add_u32 s18, s18, func_v3f16@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s19, s19, func_v3f16@rel32@hi+12
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GCN-NEXT: s_branch .LBB5_3
|
||||
; GCN-NEXT: .LBB5_2:
|
||||
|
@ -2917,8 +2917,8 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v5, v1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v4, v0
|
||||
; GFX10-NEXT: s_mov_b64 s[4:5], src_private_base
|
||||
; GFX10-NEXT: v_mov_b32_e32 v4, v0
|
||||
; GFX10-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, s5, v5
|
||||
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
|
||||
@ -5754,8 +5754,8 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__am
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v5, v1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v4, v0
|
||||
; GFX10-NEXT: s_mov_b64 s[4:5], src_private_base
|
||||
; GFX10-NEXT: v_mov_b32_e32 v4, v0
|
||||
; GFX10-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, s5, v5
|
||||
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
|
||||
|
@ -2917,8 +2917,8 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v5, v1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v4, v0
|
||||
; GFX10-NEXT: s_mov_b64 s[4:5], src_private_base
|
||||
; GFX10-NEXT: v_mov_b32_e32 v4, v0
|
||||
; GFX10-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, s5, v5
|
||||
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
|
||||
@ -5754,8 +5754,8 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__am
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v5, v1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v4, v0
|
||||
; GFX10-NEXT: s_mov_b64 s[4:5], src_private_base
|
||||
; GFX10-NEXT: v_mov_b32_e32 v4, v0
|
||||
; GFX10-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, s5, v5
|
||||
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
|
||||
|
@ -19,12 +19,12 @@ define void @callee_with_stack_and_call() #0 {
|
||||
; SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x400
|
||||
; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, 0
|
||||
; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33
|
||||
; SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
|
||||
; SPILL-TO-VGPR-NEXT: s_getpc_b64 s[4:5]
|
||||
; SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
|
||||
; SPILL-TO-VGPR-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
|
||||
; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33
|
||||
; SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
|
||||
; SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; SPILL-TO-VGPR-NEXT: v_readlane_b32 s30, v40, 0
|
||||
@ -62,11 +62,11 @@ define void @callee_with_stack_and_call() #0 {
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5]
|
||||
; NO-SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, 0
|
||||
; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_getpc_b64 s[4:5]
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
|
||||
; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1
|
||||
|
@ -1267,16 +1267,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope
|
||||
;
|
||||
; GFX1032-LABEL: global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp:
|
||||
; GFX1032: ; %bb.0:
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB2_3
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
@ -1487,16 +1487,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope
|
||||
;
|
||||
; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp:
|
||||
; GFX1032-DPP: ; %bb.0:
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB2_3
|
||||
; GFX1032-DPP-NEXT: ; %bb.1:
|
||||
@ -2487,16 +2487,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_
|
||||
;
|
||||
; GFX1032-LABEL: global_atomic_fadd_uni_address_uni_value_agent_scope_strictfp:
|
||||
; GFX1032: ; %bb.0:
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB4_3
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
@ -2737,16 +2737,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_
|
||||
;
|
||||
; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_uni_value_agent_scope_strictfp:
|
||||
; GFX1032-DPP: ; %bb.0:
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB4_3
|
||||
; GFX1032-DPP-NEXT: ; %bb.1:
|
||||
@ -4543,16 +4543,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_default_scop
|
||||
;
|
||||
; GFX1032-LABEL: global_atomic_fadd_uni_address_uni_value_default_scope_strictfp:
|
||||
; GFX1032: ; %bb.0:
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB7_3
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
@ -4793,16 +4793,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_default_scop
|
||||
;
|
||||
; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_uni_value_default_scope_strictfp:
|
||||
; GFX1032-DPP: ; %bb.0:
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB7_3
|
||||
; GFX1032-DPP-NEXT: ; %bb.1:
|
||||
@ -5987,19 +5987,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
|
||||
;
|
||||
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
|
||||
; GFX1032: ; %bb.0:
|
||||
; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: s_mov_b32 s42, s9
|
||||
; GFX1032-NEXT: s_mov_b32 s9, exec_lo
|
||||
; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-NEXT: s_mov_b32 s50, -1
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
|
||||
; GFX1032-NEXT: s_add_u32 s48, s48, s11
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-NEXT: s_addc_u32 s49, s49, 0
|
||||
; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
|
||||
; GFX1032-NEXT: s_mov_b32 s46, 0
|
||||
; GFX1032-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB9_3
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
@ -6446,19 +6446,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
|
||||
;
|
||||
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
|
||||
; GFX1032-DPP: ; %bb.0:
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s9, exec_lo
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
|
||||
; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
|
||||
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB9_3
|
||||
; GFX1032-DPP-NEXT: ; %bb.1:
|
||||
@ -7692,8 +7692,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
|
||||
; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0
|
||||
; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
|
||||
@ -8122,16 +8122,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_one_a
|
||||
;
|
||||
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_structfp:
|
||||
; GFX1032: ; %bb.0:
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB11_3
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
@ -8379,16 +8379,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_one_a
|
||||
;
|
||||
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_structfp:
|
||||
; GFX1032-DPP: ; %bb.0:
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB11_3
|
||||
; GFX1032-DPP-NEXT: ; %bb.1:
|
||||
@ -9217,8 +9217,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
|
||||
; GFX1032-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0
|
||||
; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[3:4], v[5:6]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
|
||||
@ -9555,16 +9555,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
|
||||
;
|
||||
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_strictfp:
|
||||
; GFX1032: ; %bb.0:
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB13_3
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
@ -9812,16 +9812,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
|
||||
;
|
||||
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_strictfp:
|
||||
; GFX1032-DPP: ; %bb.0:
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB13_3
|
||||
; GFX1032-DPP-NEXT: ; %bb.1:
|
||||
@ -10650,8 +10650,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
|
||||
; GFX1032-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0
|
||||
; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[3:4], v[5:6]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
|
||||
@ -11565,8 +11565,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
|
||||
; GFX1032-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0
|
||||
; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[3:4], v[5:6]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
|
||||
@ -13748,8 +13748,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
|
||||
; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0
|
||||
; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
|
||||
|
@ -3366,17 +3366,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
|
||||
;
|
||||
; GFX1032-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
|
||||
; GFX1032: ; %bb.0:
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-NEXT: s_mov_b32 s50, -1
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-NEXT: s_add_u32 s48, s48, s11
|
||||
; GFX1032-NEXT: s_addc_u32 s49, s49, 0
|
||||
; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
|
||||
; GFX1032-NEXT: s_mov_b32 s46, 0
|
||||
; GFX1032-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB6_3
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
@ -3806,17 +3806,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
|
||||
;
|
||||
; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
|
||||
; GFX1032-DPP: ; %bb.0:
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
|
||||
; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
|
||||
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB6_3
|
||||
; GFX1032-DPP-NEXT: ; %bb.1:
|
||||
@ -5094,8 +5094,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
|
||||
; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
|
||||
; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
|
||||
@ -6469,8 +6469,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
|
||||
; GFX1032-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[5:6]
|
||||
; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[5:6]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
||||
@ -6914,17 +6914,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
|
||||
;
|
||||
; GFX1032-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
|
||||
; GFX1032: ; %bb.0:
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-NEXT: s_mov_b32 s50, -1
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-NEXT: s_add_u32 s48, s48, s11
|
||||
; GFX1032-NEXT: s_addc_u32 s49, s49, 0
|
||||
; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
|
||||
; GFX1032-NEXT: s_mov_b32 s46, 0
|
||||
; GFX1032-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB10_3
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
@ -7354,17 +7354,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
|
||||
;
|
||||
; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
|
||||
; GFX1032-DPP: ; %bb.0:
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
|
||||
; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
|
||||
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3
|
||||
; GFX1032-DPP-NEXT: ; %bb.1:
|
||||
@ -8642,8 +8642,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
|
||||
; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
|
||||
; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
|
||||
|
@ -3366,17 +3366,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
|
||||
;
|
||||
; GFX1032-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
|
||||
; GFX1032: ; %bb.0:
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-NEXT: s_mov_b32 s50, -1
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-NEXT: s_add_u32 s48, s48, s11
|
||||
; GFX1032-NEXT: s_addc_u32 s49, s49, 0
|
||||
; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
|
||||
; GFX1032-NEXT: s_mov_b32 s46, 0
|
||||
; GFX1032-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB6_3
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
@ -3806,17 +3806,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
|
||||
;
|
||||
; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
|
||||
; GFX1032-DPP: ; %bb.0:
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
|
||||
; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
|
||||
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB6_3
|
||||
; GFX1032-DPP-NEXT: ; %bb.1:
|
||||
@ -5094,8 +5094,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
|
||||
; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
|
||||
; GFX1032-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
|
||||
@ -6469,8 +6469,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
|
||||
; GFX1032-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[5:6]
|
||||
; GFX1032-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[5:6]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7
|
||||
@ -6914,17 +6914,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
|
||||
;
|
||||
; GFX1032-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
|
||||
; GFX1032: ; %bb.0:
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-NEXT: s_mov_b32 s50, -1
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-NEXT: s_add_u32 s48, s48, s11
|
||||
; GFX1032-NEXT: s_addc_u32 s49, s49, 0
|
||||
; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
|
||||
; GFX1032-NEXT: s_mov_b32 s46, 0
|
||||
; GFX1032-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB10_3
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
@ -7354,17 +7354,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
|
||||
;
|
||||
; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
|
||||
; GFX1032-DPP: ; %bb.0:
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
|
||||
; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
|
||||
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3
|
||||
; GFX1032-DPP-NEXT: ; %bb.1:
|
||||
@ -8642,8 +8642,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
|
||||
; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
|
||||
; GFX1032-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
|
||||
|
@ -1379,16 +1379,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope
|
||||
;
|
||||
; GFX1032-LABEL: global_atomic_fsub_uni_address_uni_value_one_as_scope_unsafe_structfp:
|
||||
; GFX1032: ; %bb.0:
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB2_3
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
@ -1629,16 +1629,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope
|
||||
;
|
||||
; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_uni_value_one_as_scope_unsafe_structfp:
|
||||
; GFX1032-DPP: ; %bb.0:
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB2_3
|
||||
; GFX1032-DPP-NEXT: ; %bb.1:
|
||||
@ -2711,16 +2711,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_
|
||||
;
|
||||
; GFX1032-LABEL: global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp:
|
||||
; GFX1032: ; %bb.0:
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB4_3
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
@ -2961,16 +2961,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_
|
||||
;
|
||||
; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp:
|
||||
; GFX1032-DPP: ; %bb.0:
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB4_3
|
||||
; GFX1032-DPP-NEXT: ; %bb.1:
|
||||
@ -4871,16 +4871,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_default_scop
|
||||
;
|
||||
; GFX1032-LABEL: global_atomic_fsub_uni_address_uni_value_default_scope_strictfp:
|
||||
; GFX1032: ; %bb.0:
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB7_3
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
@ -5121,16 +5121,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_default_scop
|
||||
;
|
||||
; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_uni_value_default_scope_strictfp:
|
||||
; GFX1032-DPP: ; %bb.0:
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB7_3
|
||||
; GFX1032-DPP-NEXT: ; %bb.1:
|
||||
@ -6315,19 +6315,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
|
||||
;
|
||||
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
|
||||
; GFX1032: ; %bb.0:
|
||||
; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: s_mov_b32 s42, s9
|
||||
; GFX1032-NEXT: s_mov_b32 s9, exec_lo
|
||||
; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-NEXT: s_mov_b32 s50, -1
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
|
||||
; GFX1032-NEXT: s_add_u32 s48, s48, s11
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-NEXT: s_addc_u32 s49, s49, 0
|
||||
; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
|
||||
; GFX1032-NEXT: s_mov_b32 s46, 0
|
||||
; GFX1032-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB9_3
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
@ -6774,19 +6774,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
|
||||
;
|
||||
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
|
||||
; GFX1032-DPP: ; %bb.0:
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s9, exec_lo
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
|
||||
; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
|
||||
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
|
||||
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB9_3
|
||||
; GFX1032-DPP-NEXT: ; %bb.1:
|
||||
@ -8020,8 +8020,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
|
||||
; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0
|
||||
; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
|
||||
@ -8450,16 +8450,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_one_a
|
||||
;
|
||||
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_uni_value_one_as_scope_unsafe_structfp:
|
||||
; GFX1032: ; %bb.0:
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB11_3
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
@ -8707,16 +8707,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_one_a
|
||||
;
|
||||
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_one_as_scope_unsafe_structfp:
|
||||
; GFX1032-DPP: ; %bb.0:
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB11_3
|
||||
; GFX1032-DPP-NEXT: ; %bb.1:
|
||||
@ -9544,8 +9544,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
|
||||
; GFX1032-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0
|
||||
; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[3:4], v[5:6]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
|
||||
@ -9882,16 +9882,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
|
||||
;
|
||||
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp:
|
||||
; GFX1032: ; %bb.0:
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB13_3
|
||||
; GFX1032-NEXT: ; %bb.1:
|
||||
@ -10139,16 +10139,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
|
||||
;
|
||||
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp:
|
||||
; GFX1032-DPP: ; %bb.0:
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s14, -1
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s11
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB13_3
|
||||
; GFX1032-DPP-NEXT: ; %bb.1:
|
||||
@ -10977,8 +10977,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
|
||||
; GFX1032-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0
|
||||
; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[3:4], v[5:6]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
|
||||
@ -11892,8 +11892,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
|
||||
; GFX1032-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0
|
||||
; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[3:4], v[5:6]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v7, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s2, 0
|
||||
@ -14074,8 +14074,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
|
||||
; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0
|
||||
; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11]
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
|
||||
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
|
||||
; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
|
||||
|
@ -54,6 +54,9 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b32 s33, s16
|
||||
; CHECK-NEXT: s_addc_u32 s45, s35, 0
|
||||
; CHECK-NEXT: s_mov_b32 s43, s14
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z13get_global_idj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z13get_global_idj@rel32@hi+12
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
|
||||
; CHECK-NEXT: s_mov_b32 s12, s14
|
||||
; CHECK-NEXT: s_mov_b32 s13, s15
|
||||
@ -62,14 +65,14 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
|
||||
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
|
||||
; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z13get_global_idj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z13get_global_idj@rel32@hi+12
|
||||
; CHECK-NEXT: v_mov_b32_e32 v45, 0
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v43, v0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v31, v40
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z12get_local_idj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z12get_local_idj@rel32@hi+12
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
|
||||
@ -77,13 +80,13 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b32 s12, s43
|
||||
; CHECK-NEXT: s_mov_b32 s13, s42
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z12get_local_idj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z12get_local_idj@rel32@hi+12
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v41, v0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v31, v40
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
|
||||
@ -92,26 +95,23 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b32 s13, s42
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: ds_write_b32 v45, v45 offset:15360
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v43
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 2, v43
|
||||
; CHECK-NEXT: v_mov_b32_e32 v31, v40
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, 0x7ffffffc, v0
|
||||
; CHECK-NEXT: v_and_b32_e32 v1, 28, v1
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
|
||||
; CHECK-NEXT: s_mov_b32 s12, s43
|
||||
; CHECK-NEXT: global_load_dword v0, v0, s[52:53]
|
||||
; CHECK-NEXT: s_mov_b32 s13, s42
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z3minjj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z3minjj@rel32@hi+12
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, 0x7ffffffc, v0
|
||||
; CHECK-NEXT: v_and_b32_e32 v1, 28, v1
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
|
||||
; CHECK-NEXT: global_load_dword v0, v0, s[52:53]
|
||||
; CHECK-NEXT: s_mov_b32 s12, s43
|
||||
; CHECK-NEXT: s_mov_b32 s13, s42
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: v_bfe_u32 v0, v0, v1, 4
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 12
|
||||
@ -190,6 +190,9 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
|
||||
; CHECK-NEXT: s_add_u32 s8, s34, 40
|
||||
; CHECK-NEXT: s_addc_u32 s9, s35, 0
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
|
||||
@ -197,9 +200,6 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b32 s13, s42
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; CHECK-NEXT: ds_write_b32 v0, v58
|
||||
@ -215,6 +215,9 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
|
||||
; CHECK-NEXT: s_add_u32 s8, s34, 40
|
||||
; CHECK-NEXT: s_addc_u32 s9, s35, 0
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
|
||||
@ -223,9 +226,6 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v60, 1, v58
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; CHECK-NEXT: ds_write_b32 v0, v60
|
||||
@ -241,6 +241,9 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
|
||||
; CHECK-NEXT: s_add_u32 s8, s34, 40
|
||||
; CHECK-NEXT: s_addc_u32 s9, s35, 0
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
|
||||
@ -249,9 +252,6 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v60, 2, v58
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; CHECK-NEXT: ds_write_b32 v0, v60
|
||||
@ -267,6 +267,9 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
|
||||
; CHECK-NEXT: s_add_u32 s8, s34, 40
|
||||
; CHECK-NEXT: s_addc_u32 s9, s35, 0
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
|
||||
@ -275,9 +278,6 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v58, 3, v58
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; CHECK-NEXT: ds_write_b32 v0, v58
|
||||
@ -319,6 +319,9 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
|
||||
; CHECK-NEXT: s_add_u32 s8, s34, 40
|
||||
; CHECK-NEXT: s_addc_u32 s9, s35, 0
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
|
||||
@ -326,9 +329,6 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: s_mov_b32 s13, s42
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; CHECK-NEXT: ds_write_b32 v0, v57
|
||||
@ -356,15 +356,15 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
||||
; CHECK-NEXT: s_add_u32 s8, s34, 40
|
||||
; CHECK-NEXT: s_addc_u32 s9, s35, 0
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
|
||||
; CHECK-NEXT: s_mov_b32 s12, s43
|
||||
; CHECK-NEXT: s_mov_b32 s13, s42
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: s_mov_b32 s4, exec_lo
|
||||
@ -381,15 +381,15 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: s_add_u32 s8, s34, 40
|
||||
; CHECK-NEXT: s_addc_u32 s9, s35, 0
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z14get_local_sizej@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z14get_local_sizej@rel32@hi+12
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
|
||||
; CHECK-NEXT: s_mov_b32 s12, s43
|
||||
; CHECK-NEXT: s_mov_b32 s13, s42
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z14get_local_sizej@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z14get_local_sizej@rel32@hi+12
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_add_co_u32 v41, vcc_lo, v0, v41
|
||||
; CHECK-NEXT: v_cmp_le_u32_e32 vcc_lo, v47, v41
|
||||
@ -439,16 +439,16 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, 0xf0, v0
|
||||
; CHECK-NEXT: v_and_b32_e32 v1, 15, v1
|
||||
; CHECK-NEXT: s_addc_u32 s9, s35, 0
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
|
||||
; CHECK-NEXT: v_or3_b32 v2, v3, v2, v4
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
|
||||
; CHECK-NEXT: s_mov_b32 s12, s43
|
||||
; CHECK-NEXT: s_mov_b32 s13, s42
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: v_or3_b32 v73, v2, v0, v1
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_addPU3AS1Vjj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_addPU3AS1Vjj@rel32@hi+12
|
||||
; CHECK-NEXT: v_or3_b32 v2, v3, v2, v4
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
|
||||
; CHECK-NEXT: s_mov_b32 s12, s43
|
||||
; CHECK-NEXT: s_mov_b32 s13, s42
|
||||
; CHECK-NEXT: v_or3_b32 v73, v2, v0, v1
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v73
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 2, v73
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fffc, v0
|
||||
@ -500,15 +500,15 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, v44
|
||||
; CHECK-NEXT: s_add_u32 s8, s34, 40
|
||||
; CHECK-NEXT: s_addc_u32 s9, s35, 0
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_subPU3AS1Vjj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_subPU3AS1Vjj@rel32@hi+12
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
|
||||
; CHECK-NEXT: s_mov_b32 s12, s43
|
||||
; CHECK-NEXT: s_mov_b32 s13, s42
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_subPU3AS1Vjj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_subPU3AS1Vjj@rel32@hi+12
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: s_branch .LBB0_27
|
||||
; CHECK-NEXT: .LBB0_33:
|
||||
@ -803,6 +803,9 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
|
||||
; CHECK-NEXT: s_mov_b32 s33, s16
|
||||
; CHECK-NEXT: s_addc_u32 s45, s39, 0
|
||||
; CHECK-NEXT: s_mov_b32 s43, s14
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z13get_global_idj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z13get_global_idj@rel32@hi+12
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
|
||||
; CHECK-NEXT: s_mov_b32 s12, s14
|
||||
; CHECK-NEXT: s_mov_b32 s13, s15
|
||||
@ -811,14 +814,14 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
|
||||
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
|
||||
; CHECK-NEXT: s_mov_b64 s[36:37], s[6:7]
|
||||
; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z13get_global_idj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z13get_global_idj@rel32@hi+12
|
||||
; CHECK-NEXT: v_mov_b32_e32 v43, 0
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v42, v0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v31, v40
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z12get_local_idj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z12get_local_idj@rel32@hi+12
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
|
||||
@ -826,13 +829,13 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
|
||||
; CHECK-NEXT: s_mov_b32 s12, s43
|
||||
; CHECK-NEXT: s_mov_b32 s13, s42
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z12get_local_idj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z12get_local_idj@rel32@hi+12
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_mul_lo_u32 v46, v0, 14
|
||||
; CHECK-NEXT: v_mov_b32_e32 v31, v40
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
|
||||
@ -841,27 +844,24 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
|
||||
; CHECK-NEXT: s_mov_b32 s13, s42
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: ds_write_b32 v43, v43 offset:15360
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v44, 0x3c04, v46
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v42
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 2, v42
|
||||
; CHECK-NEXT: v_mov_b32_e32 v31, v40
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, 0x7ffffffc, v0
|
||||
; CHECK-NEXT: v_and_b32_e32 v1, 28, v1
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
; CHECK-NEXT: s_mov_b32 s12, s43
|
||||
; CHECK-NEXT: global_load_dword v0, v0, s[46:47]
|
||||
; CHECK-NEXT: s_mov_b32 s13, s42
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z3minjj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z3minjj@rel32@hi+12
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, 0x7ffffffc, v0
|
||||
; CHECK-NEXT: v_and_b32_e32 v1, 28, v1
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
; CHECK-NEXT: global_load_dword v0, v0, s[46:47]
|
||||
; CHECK-NEXT: s_mov_b32 s12, s43
|
||||
; CHECK-NEXT: s_mov_b32 s13, s42
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: v_bfe_u32 v0, v0, v1, 4
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 12
|
||||
@ -945,6 +945,9 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
|
||||
; CHECK-NEXT: s_add_u32 s8, s38, 40
|
||||
; CHECK-NEXT: s_addc_u32 s9, s39, 0
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
@ -952,9 +955,6 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
|
||||
; CHECK-NEXT: s_mov_b32 s13, s42
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v43, 1, v43
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; CHECK-NEXT: ds_write_b32 v0, v47
|
||||
@ -982,15 +982,15 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
||||
; CHECK-NEXT: s_add_u32 s8, s38, 40
|
||||
; CHECK-NEXT: s_addc_u32 s9, s39, 0
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
; CHECK-NEXT: s_mov_b32 s12, s43
|
||||
; CHECK-NEXT: s_mov_b32 s13, s42
|
||||
; CHECK-NEXT: s_mov_b32 s14, s33
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; CHECK-NEXT: s_endpgm
|
||||
.5:
|
||||
|
@ -4,8 +4,8 @@
|
||||
define amdgpu_cs void @if_then(ptr addrspace(8) inreg %input, ptr addrspace(8) inreg %output, <3 x i32> %LocalInvocationId) {
|
||||
; GCN-LABEL: if_then:
|
||||
; GCN: ; %bb.0: ; %.entry
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GCN-NEXT: ; %bb.1: ; %.bb0
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, 1
|
||||
@ -60,8 +60,8 @@ define amdgpu_cs void @if_then(ptr addrspace(8) inreg %input, ptr addrspace(8) i
|
||||
define amdgpu_cs void @if_else_vgpr_opt(ptr addrspace(8) inreg %input, ptr addrspace(8) inreg %output, <3 x i32> %LocalInvocationId) {
|
||||
; GCN-LABEL: if_else_vgpr_opt:
|
||||
; GCN: ; %bb.0: ; %.entry
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GCN-NEXT: ; %bb.1: ; %.bb0
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, 1
|
||||
|
@ -236,10 +236,10 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, pt
|
||||
; GCN-NEXT: v_writelane_b32 v40, s4, 2
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x400
|
||||
; GCN-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GCN-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GCN-NEXT: s_getpc_b64 s[4:5]
|
||||
; GCN-NEXT: s_add_u32 s4, s4, i32_fastcc_i32_byval_i32@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s5, s5, i32_fastcc_i32_byval_i32@rel32@hi+12
|
||||
; GCN-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
@ -881,6 +881,9 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 9
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GCN-NEXT: s_getpc_b64 s[16:17]
|
||||
; GCN-NEXT: s_add_u32 s16, s16, void_fastcc_multi_byval@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s17, s17, void_fastcc_multi_byval@rel32@hi+12
|
||||
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:148
|
||||
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:144
|
||||
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:152
|
||||
@ -895,9 +898,6 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
|
||||
; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20
|
||||
; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16
|
||||
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8
|
||||
; GCN-NEXT: s_getpc_b64 s[16:17]
|
||||
; GCN-NEXT: s_add_u32 s16, s16, void_fastcc_multi_byval@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s17, s17, void_fastcc_multi_byval@rel32@hi+12
|
||||
; GCN-NEXT: s_setpc_b64 s[16:17]
|
||||
entry:
|
||||
%alloca0 = alloca [3 x i32], align 16, addrspace(5)
|
||||
@ -925,6 +925,9 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
|
||||
; GCN-NEXT: s_getpc_b64 s[16:17]
|
||||
; GCN-NEXT: s_add_u32 s16, s16, void_fastcc_byval_and_stack_passed@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s17, s17, void_fastcc_byval_and_stack_passed@rel32@hi+12
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
||||
@ -956,9 +959,6 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
|
||||
; GCN-NEXT: v_mov_b32_e32 v28, 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v29, 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v30, 0
|
||||
; GCN-NEXT: s_getpc_b64 s[16:17]
|
||||
; GCN-NEXT: s_add_u32 s16, s16, void_fastcc_byval_and_stack_passed@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s17, s17, void_fastcc_byval_and_stack_passed@rel32@hi+12
|
||||
; GCN-NEXT: s_setpc_b64 s[16:17]
|
||||
entry:
|
||||
%alloca = alloca [3 x i32], align 16, addrspace(5)
|
||||
|
@ -1023,8 +1023,8 @@ define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
|
||||
;
|
||||
; GFX10-WAVE32-LABEL: test_kill_divergent_loop:
|
||||
; GFX10-WAVE32: ; %bb.0: ; %entry
|
||||
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
||||
; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1
|
||||
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB10_3
|
||||
|
@ -21,10 +21,10 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v2, 0x4000
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v3, 0
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v4, 0x400000
|
||||
; MUBUF-NEXT: s_mov_b32 s32, 0xc0000
|
||||
; MUBUF-NEXT: s_getpc_b64 s[4:5]
|
||||
; MUBUF-NEXT: s_add_u32 s4, s4, svm_eval_nodes@rel32@lo+4
|
||||
; MUBUF-NEXT: s_addc_u32 s5, s5, svm_eval_nodes@rel32@hi+12
|
||||
; MUBUF-NEXT: s_mov_b32 s32, 0xc0000
|
||||
; MUBUF-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v0, s0
|
||||
; MUBUF-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
@ -85,10 +85,10 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
|
||||
; MUBUF11-NEXT: v_mov_b32_e32 v1, 0x2000
|
||||
; MUBUF11-NEXT: v_dual_mov_b32 v2, 0x4000 :: v_dual_mov_b32 v3, 0
|
||||
; MUBUF11-NEXT: v_mov_b32_e32 v4, 0x400000
|
||||
; MUBUF11-NEXT: s_movk_i32 s32, 0x6000
|
||||
; MUBUF11-NEXT: s_getpc_b64 s[0:1]
|
||||
; MUBUF11-NEXT: s_add_u32 s0, s0, svm_eval_nodes@rel32@lo+4
|
||||
; MUBUF11-NEXT: s_addc_u32 s1, s1, svm_eval_nodes@rel32@hi+12
|
||||
; MUBUF11-NEXT: s_movk_i32 s32, 0x6000
|
||||
; MUBUF11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; MUBUF11-NEXT: v_mov_b32_e32 v0, s2
|
||||
; MUBUF11-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
@ -112,10 +112,10 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
|
||||
; FLATSCR11-NEXT: v_mov_b32_e32 v1, 0x2000
|
||||
; FLATSCR11-NEXT: v_dual_mov_b32 v2, 0x4000 :: v_dual_mov_b32 v3, 0
|
||||
; FLATSCR11-NEXT: v_mov_b32_e32 v4, 0x400000
|
||||
; FLATSCR11-NEXT: s_movk_i32 s32, 0x6000
|
||||
; FLATSCR11-NEXT: s_getpc_b64 s[0:1]
|
||||
; FLATSCR11-NEXT: s_add_u32 s0, s0, svm_eval_nodes@rel32@lo+4
|
||||
; FLATSCR11-NEXT: s_addc_u32 s1, s1, svm_eval_nodes@rel32@hi+12
|
||||
; FLATSCR11-NEXT: s_movk_i32 s32, 0x6000
|
||||
; FLATSCR11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; FLATSCR11-NEXT: v_mov_b32_e32 v0, s2
|
||||
; FLATSCR11-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
|
@ -25,17 +25,20 @@ define hidden fastcc void @bar(i32 %arg, ptr %arg1, ptr %arg2, ptr %arg3, ptr %a
|
||||
; CHECK-NEXT: s_getpc_b64 s[18:19]
|
||||
; CHECK-NEXT: s_add_u32 s18, s18, global@rel32@lo+1948
|
||||
; CHECK-NEXT: s_addc_u32 s19, s19, global@rel32@hi+1956
|
||||
; CHECK-NEXT: v_mov_b32_e32 v5, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, s18
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s19
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, eggs@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, eggs@rel32@hi+12
|
||||
; CHECK-NEXT: v_mov_b32_e32 v5, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, s18
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s19
|
||||
; CHECK-NEXT: s_setpc_b64 s[16:17]
|
||||
; CHECK-NEXT: .LBB0_3: ; %LeafBlock1
|
||||
; CHECK-NEXT: s_cbranch_scc0 .LBB0_5
|
||||
; CHECK-NEXT: ; %bb.4: ; %bb8
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, v1
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, quux@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, quux@rel32@hi+12
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, v2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, v6
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, v7
|
||||
@ -47,9 +50,6 @@ define hidden fastcc void @bar(i32 %arg, ptr %arg1, ptr %arg2, ptr %arg3, ptr %a
|
||||
; CHECK-NEXT: v_mov_b32_e32 v9, v13
|
||||
; CHECK-NEXT: v_mov_b32_e32 v10, v14
|
||||
; CHECK-NEXT: v_mov_b32_e32 v11, v15
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, quux@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, quux@rel32@hi+12
|
||||
; CHECK-NEXT: s_setpc_b64 s[16:17]
|
||||
; CHECK-NEXT: .LBB0_5: ; %bb9
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
|
@ -7,10 +7,10 @@ define void @tail_call_i32_inreg_uniform(i32 inreg %sgpr) {
|
||||
; CHECK-LABEL: tail_call_i32_inreg_uniform:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_mov_b32 s0, s16
|
||||
; CHECK-NEXT: s_getpc_b64 s[18:19]
|
||||
; CHECK-NEXT: s_add_u32 s18, s18, void_func_i32_inreg@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s19, s19, void_func_i32_inreg@rel32@hi+12
|
||||
; CHECK-NEXT: s_mov_b32 s0, s16
|
||||
; CHECK-NEXT: s_setpc_b64 s[18:19]
|
||||
tail call void @void_func_i32_inreg(i32 inreg %sgpr)
|
||||
ret void
|
||||
|
@ -290,6 +290,9 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS1-NEXT: ; %bb.31: ; %bb7.i.i
|
||||
; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
|
||||
; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
|
||||
; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17]
|
||||
; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
|
||||
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
@ -297,9 +300,6 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS1-NEXT: s_mov_b32 s13, s71
|
||||
; GLOBALNESS1-NEXT: s_mov_b32 s14, s70
|
||||
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
|
||||
; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17]
|
||||
; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
|
||||
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
|
||||
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GLOBALNESS1-NEXT: .LBB1_32: ; %Flow
|
||||
@ -308,6 +308,9 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS1-NEXT: ; %bb.33: ; %bb11.i.i
|
||||
; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
|
||||
; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
|
||||
; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17]
|
||||
; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
|
||||
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
@ -315,9 +318,6 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS1-NEXT: s_mov_b32 s13, s71
|
||||
; GLOBALNESS1-NEXT: s_mov_b32 s14, s70
|
||||
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
|
||||
; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17]
|
||||
; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
|
||||
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
|
||||
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GLOBALNESS1-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock
|
||||
;
|
||||
@ -582,6 +582,9 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS0-NEXT: ; %bb.31: ; %bb7.i.i
|
||||
; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
|
||||
; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
|
||||
; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17]
|
||||
; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
|
||||
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
@ -589,9 +592,6 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS0-NEXT: s_mov_b32 s13, s69
|
||||
; GLOBALNESS0-NEXT: s_mov_b32 s14, s68
|
||||
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
|
||||
; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17]
|
||||
; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
|
||||
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
|
||||
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GLOBALNESS0-NEXT: .LBB1_32: ; %Flow
|
||||
@ -600,6 +600,9 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS0-NEXT: ; %bb.33: ; %bb11.i.i
|
||||
; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
|
||||
; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
|
||||
; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17]
|
||||
; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
|
||||
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
|
||||
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
|
||||
@ -607,9 +610,6 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
|
||||
; GLOBALNESS0-NEXT: s_mov_b32 s13, s69
|
||||
; GLOBALNESS0-NEXT: s_mov_b32 s14, s68
|
||||
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
|
||||
; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17]
|
||||
; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
|
||||
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
|
||||
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GLOBALNESS0-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock
|
||||
bb:
|
||||
|
@ -161,16 +161,16 @@ for.end:
|
||||
define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_func, ptr %extern_func2) #0 {
|
||||
; SI-LABEL: loop:
|
||||
; SI: ; %bb.0: ; %main_body
|
||||
; SI-NEXT: v_mov_b32_e32 v6, v0
|
||||
; SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; SI-NEXT: s_mov_b32 s14, -1
|
||||
; SI-NEXT: v_mov_b32_e32 v6, v0
|
||||
; SI-NEXT: v_mov_b32_e32 v0, v1
|
||||
; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v6
|
||||
; SI-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; SI-NEXT: s_add_u32 s12, s12, s1
|
||||
; SI-NEXT: s_addc_u32 s13, s13, 0
|
||||
; SI-NEXT: s_mov_b32 s32, 0
|
||||
; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v6
|
||||
; SI-NEXT: ; implicit-def: $vgpr1
|
||||
; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; SI-NEXT: s_xor_b32 s6, exec_lo, s0
|
||||
@ -243,11 +243,11 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e
|
||||
; SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; SI-NEXT: s_mov_b32 s14, -1
|
||||
; SI-NEXT: v_mov_b32_e32 v40, v1
|
||||
; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0
|
||||
; SI-NEXT: s_mov_b32 s15, 0x31c16000
|
||||
; SI-NEXT: s_add_u32 s12, s12, s1
|
||||
; SI-NEXT: s_addc_u32 s13, s13, 0
|
||||
; SI-NEXT: s_mov_b32 s32, 0
|
||||
; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0
|
||||
; SI-NEXT: ; implicit-def: $vgpr0
|
||||
; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; SI-NEXT: s_xor_b32 s6, exec_lo, s0
|
||||
|
@ -372,8 +372,8 @@ define amdgpu_kernel void @test_loop_with_if(ptr addrspace(1) %arg) #0 {
|
||||
; GFX1032-NEXT: .LBB10_2: ; %bb2
|
||||
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1032-NEXT: v_cmp_ge_i32_e64 s4, v1, v0
|
||||
; GFX1032-NEXT: s_mov_b32 s3, 0
|
||||
; GFX1032-NEXT: v_cmp_lt_i32_e32 vcc_lo, v1, v0
|
||||
; GFX1032-NEXT: s_mov_b32 s3, 0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s5, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB10_4
|
||||
; GFX1032-NEXT: ; %bb.3: ; %bb5
|
||||
@ -515,8 +515,8 @@ bb13:
|
||||
define amdgpu_kernel void @test_loop_with_if_else_break(ptr addrspace(1) %arg) #0 {
|
||||
; GFX1032-LABEL: test_loop_with_if_else_break:
|
||||
; GFX1032: ; %bb.0: ; %bb
|
||||
; GFX1032-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
|
||||
; GFX1032-NEXT: s_mov_b32 s2, 0
|
||||
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GFX1032-NEXT: s_cbranch_execz .LBB11_6
|
||||
; GFX1032-NEXT: ; %bb.1: ; %.preheader
|
||||
|
@ -416,10 +416,10 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v0, s8
|
||||
; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1
|
||||
; GFX9-O3-NEXT: v_cndmask_b32_e64 v2, 0, v0, s[34:35]
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX9-O3-NEXT: s_getpc_b64 s[36:37]
|
||||
; GFX9-O3-NEXT: s_add_u32 s36, s36, strict_wwm_called@rel32@lo+4
|
||||
; GFX9-O3-NEXT: s_addc_u32 s37, s37, strict_wwm_called@rel32@hi+12
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[36:37]
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX9-O3-NEXT: v_add_u32_e32 v1, v1, v2
|
||||
|
@ -426,12 +426,12 @@ define amdgpu_kernel void @call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) {
|
||||
; GFX9-O3-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX9-O3-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX9-O3-NEXT: s_mov_b64 s[0:1], s[24:25]
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v31, v3
|
||||
; GFX9-O3-NEXT: s_mov_b64 s[2:3], s[26:27]
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v6
|
||||
; GFX9-O3-NEXT: s_getpc_b64 s[22:23]
|
||||
; GFX9-O3-NEXT: s_add_u32 s22, s22, called@rel32@lo+4
|
||||
; GFX9-O3-NEXT: s_addc_u32 s23, s23, called@rel32@hi+12
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v31, v3
|
||||
; GFX9-O3-NEXT: s_mov_b64 s[2:3], s[26:27]
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v6
|
||||
; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[22:23]
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v3, v0
|
||||
; GFX9-O3-NEXT: v_add_u32_e32 v3, v3, v6
|
||||
@ -1278,12 +1278,12 @@ define amdgpu_kernel void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 in
|
||||
; GFX9-O3-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX9-O3-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX9-O3-NEXT: s_mov_b64 s[0:1], s[24:25]
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v31, v3
|
||||
; GFX9-O3-NEXT: s_mov_b64 s[2:3], s[26:27]
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v6
|
||||
; GFX9-O3-NEXT: s_getpc_b64 s[22:23]
|
||||
; GFX9-O3-NEXT: s_add_u32 s22, s22, strict_wwm_called@rel32@lo+4
|
||||
; GFX9-O3-NEXT: s_addc_u32 s23, s23, strict_wwm_called@rel32@hi+12
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v31, v3
|
||||
; GFX9-O3-NEXT: s_mov_b64 s[2:3], s[26:27]
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v6
|
||||
; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[22:23]
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v3, v0
|
||||
; GFX9-O3-NEXT: v_add_u32_e32 v3, v3, v6
|
||||
|
@ -41,8 +41,8 @@ define fastcc ptr @wrongUseOfPostDominate(ptr readonly %s, i32 %off, ptr readnon
|
||||
; ENABLE-NEXT: bhs .LBB0_6
|
||||
; ENABLE-NEXT: @ %bb.5: @ %while.body
|
||||
; ENABLE-NEXT: @ in Loop: Header=BB0_4 Depth=1
|
||||
; ENABLE-NEXT: cmp r0, r2
|
||||
; ENABLE-NEXT: mov r1, r3
|
||||
; ENABLE-NEXT: cmp r0, r2
|
||||
; ENABLE-NEXT: blo .LBB0_4
|
||||
; ENABLE-NEXT: .LBB0_6: @ %if.end29
|
||||
; ENABLE-NEXT: pop {r11, pc}
|
||||
@ -131,8 +131,8 @@ define fastcc ptr @wrongUseOfPostDominate(ptr readonly %s, i32 %off, ptr readnon
|
||||
; DISABLE-NEXT: bhs .LBB0_6
|
||||
; DISABLE-NEXT: @ %bb.5: @ %while.body
|
||||
; DISABLE-NEXT: @ in Loop: Header=BB0_4 Depth=1
|
||||
; DISABLE-NEXT: cmp r0, r2
|
||||
; DISABLE-NEXT: mov r1, r3
|
||||
; DISABLE-NEXT: cmp r0, r2
|
||||
; DISABLE-NEXT: blo .LBB0_4
|
||||
; DISABLE-NEXT: .LBB0_6: @ %if.end29
|
||||
; DISABLE-NEXT: pop {r11, pc}
|
||||
|
@ -2017,8 +2017,8 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t
|
||||
; ARM-DISABLE-NEXT: sub r4, sp, #24
|
||||
; ARM-DISABLE-NEXT: bfc r4, #0, #4
|
||||
; ARM-DISABLE-NEXT: mov sp, r4
|
||||
; ARM-DISABLE-NEXT: tst r2, #1
|
||||
; ARM-DISABLE-NEXT: vst1.64 {d8, d9}, [r4:128]
|
||||
; ARM-DISABLE-NEXT: tst r2, #1
|
||||
; ARM-DISABLE-NEXT: vstr d10, [r4, #16]
|
||||
; ARM-DISABLE-NEXT: beq LBB12_2
|
||||
; ARM-DISABLE-NEXT: @ %bb.1: @ %bb3
|
||||
@ -2123,8 +2123,8 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t
|
||||
; THUMB-DISABLE-NEXT: sub.w r4, sp, #24
|
||||
; THUMB-DISABLE-NEXT: bfc r4, #0, #4
|
||||
; THUMB-DISABLE-NEXT: mov sp, r4
|
||||
; THUMB-DISABLE-NEXT: lsls r1, r2, #31
|
||||
; THUMB-DISABLE-NEXT: vst1.64 {d8, d9}, [r4:128]
|
||||
; THUMB-DISABLE-NEXT: lsls r1, r2, #31
|
||||
; THUMB-DISABLE-NEXT: vstr d10, [r4, #16]
|
||||
; THUMB-DISABLE-NEXT: beq LBB12_2
|
||||
; THUMB-DISABLE-NEXT: @ %bb.1: @ %bb3
|
||||
|
@ -50,9 +50,9 @@ define void @test_pr22678() {
|
||||
define <4 x i32> @test_vmovrrd_combine() nounwind {
|
||||
; CHECK-LABEL: test_vmovrrd_combine:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: @ implicit-def: $q8
|
||||
; CHECK-NEXT: mov r0, #0
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: @ implicit-def: $q8
|
||||
; CHECK-NEXT: bne .LBB3_2
|
||||
; CHECK-NEXT: @ %bb.1: @ %bb1.preheader
|
||||
; CHECK-NEXT: vmov.i32 q8, #0x0
|
||||
|
@ -54,13 +54,13 @@ define dso_local double @P10_Spill_CR_EQ(ptr %arg) local_unnamed_addr #0 {
|
||||
; CHECK-NEXT: # implicit-def: $r4
|
||||
; CHECK-NEXT: .LBB0_8: # %bb20
|
||||
; CHECK-NEXT: mfcr r12
|
||||
; CHECK-NEXT: cmpwi cr2, r3, -1
|
||||
; CHECK-NEXT: cmpwi cr3, r4, -1
|
||||
; CHECK-NEXT: cmpwi cr2, r3, -1
|
||||
; CHECK-NEXT: stw r12, 8(r1)
|
||||
; CHECK-NEXT: cmpwi cr7, r3, 0
|
||||
; CHECK-NEXT: cmpwi cr6, r4, 0
|
||||
; CHECK-NEXT: crand 4*cr5+gt, 4*cr2+gt, 4*cr1+lt
|
||||
; CHECK-NEXT: crand 4*cr5+lt, 4*cr3+gt, 4*cr5+un
|
||||
; CHECK-NEXT: crand 4*cr5+gt, 4*cr2+gt, 4*cr1+lt
|
||||
; CHECK-NEXT: # implicit-def: $x3
|
||||
; CHECK-NEXT: bc 4, 4*cr5+gt, .LBB0_10
|
||||
; CHECK-NEXT: # %bb.9: # %bb34
|
||||
@ -95,15 +95,15 @@ define dso_local double @P10_Spill_CR_EQ(ptr %arg) local_unnamed_addr #0 {
|
||||
; CHECK-NEXT: lwz r7, 0(r3)
|
||||
; CHECK-NEXT: .LBB0_18: # %bb58
|
||||
; CHECK-NEXT: lwz r6, 92(r6)
|
||||
; CHECK-NEXT: cmpwi cr4, r7, 1
|
||||
; CHECK-NEXT: crand 4*cr7+un, 4*cr3+gt, 4*cr6+un
|
||||
; CHECK-NEXT: cmpwi cr3, r5, 1
|
||||
; CHECK-NEXT: cmpwi cr4, r7, 1
|
||||
; CHECK-NEXT: crand 4*cr7+gt, 4*cr7+eq, 4*cr1+lt
|
||||
; CHECK-NEXT: # implicit-def: $x5
|
||||
; CHECK-NEXT: crand 4*cr6+un, 4*cr2+eq, 4*cr6+un
|
||||
; CHECK-NEXT: crand 4*cr5+un, 4*cr6+eq, 4*cr5+un
|
||||
; CHECK-NEXT: crand 4*cr6+gt, 4*cr3+lt, 4*cr6+gt
|
||||
; CHECK-NEXT: crand 4*cr7+lt, 4*cr4+lt, 4*cr7+lt
|
||||
; CHECK-NEXT: crand 4*cr6+gt, 4*cr3+lt, 4*cr6+gt
|
||||
; CHECK-NEXT: cmpwi r6, 1
|
||||
; CHECK-NEXT: crand 4*cr6+lt, lt, 4*cr6+lt
|
||||
; CHECK-NEXT: bc 4, 4*cr6+gt, .LBB0_20
|
||||
|
@ -24,8 +24,8 @@ define arm_aapcs_vfpcc void @fast_float_mul(ptr nocapture %a, ptr nocapture read
|
||||
; CHECK-NEXT: cmpeq.w r12, #0
|
||||
; CHECK-NEXT: beq .LBB0_4
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
|
||||
; CHECK-NEXT: subs r4, r3, #1
|
||||
; CHECK-NEXT: and r12, r3, #3
|
||||
; CHECK-NEXT: subs r4, r3, #1
|
||||
; CHECK-NEXT: cmp r4, #3
|
||||
; CHECK-NEXT: bhs .LBB0_6
|
||||
; CHECK-NEXT: @ %bb.3:
|
||||
|
@ -10,8 +10,8 @@ define void @arm_min_q31(ptr nocapture readonly %pSrc, i32 %blockSize, ptr nocap
|
||||
; CHECK-NEXT: subs.w r9, r1, #1
|
||||
; CHECK-NEXT: beq .LBB0_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
|
||||
; CHECK-NEXT: subs r7, r1, #2
|
||||
; CHECK-NEXT: and r8, r9, #3
|
||||
; CHECK-NEXT: subs r7, r1, #2
|
||||
; CHECK-NEXT: cmp r7, #3
|
||||
; CHECK-NEXT: bhs .LBB0_4
|
||||
; CHECK-NEXT: @ %bb.2:
|
||||
|
@ -1411,8 +1411,8 @@ define arm_aapcs_vfpcc float @half_half_mac(ptr nocapture readonly %a, ptr nocap
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: cbz r2, .LBB9_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
|
||||
; CHECK-NEXT: subs r3, r2, #1
|
||||
; CHECK-NEXT: and r12, r2, #3
|
||||
; CHECK-NEXT: subs r3, r2, #1
|
||||
; CHECK-NEXT: cmp r3, #3
|
||||
; CHECK-NEXT: bhs .LBB9_4
|
||||
; CHECK-NEXT: @ %bb.2:
|
||||
@ -1566,8 +1566,8 @@ define arm_aapcs_vfpcc float @half_half_acc(ptr nocapture readonly %a, ptr nocap
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: cbz r2, .LBB10_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
|
||||
; CHECK-NEXT: subs r3, r2, #1
|
||||
; CHECK-NEXT: and r12, r2, #3
|
||||
; CHECK-NEXT: subs r3, r2, #1
|
||||
; CHECK-NEXT: cmp r3, #3
|
||||
; CHECK-NEXT: bhs .LBB10_4
|
||||
; CHECK-NEXT: @ %bb.2:
|
||||
@ -1721,8 +1721,8 @@ define arm_aapcs_vfpcc float @half_short_mac(ptr nocapture readonly %a, ptr noca
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: cbz r2, .LBB11_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
|
||||
; CHECK-NEXT: subs r3, r2, #1
|
||||
; CHECK-NEXT: and r12, r2, #3
|
||||
; CHECK-NEXT: subs r3, r2, #1
|
||||
; CHECK-NEXT: cmp r3, #3
|
||||
; CHECK-NEXT: bhs .LBB11_4
|
||||
; CHECK-NEXT: @ %bb.2:
|
||||
|
@ -348,8 +348,8 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(ptr nocapture readonly
|
||||
; CHECK-NEXT: cmpeq r7, #0
|
||||
; CHECK-NEXT: beq .LBB5_4
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
|
||||
; CHECK-NEXT: subs r7, r4, #1
|
||||
; CHECK-NEXT: and r12, r4, #3
|
||||
; CHECK-NEXT: subs r7, r4, #1
|
||||
; CHECK-NEXT: cmp r7, #3
|
||||
; CHECK-NEXT: bhs .LBB5_6
|
||||
; CHECK-NEXT: @ %bb.3:
|
||||
@ -624,8 +624,8 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(ptr nocapture readonl
|
||||
; CHECK-NEXT: cmpeq r7, #0
|
||||
; CHECK-NEXT: beq .LBB7_4
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
|
||||
; CHECK-NEXT: subs r7, r4, #1
|
||||
; CHECK-NEXT: and r12, r4, #3
|
||||
; CHECK-NEXT: subs r7, r4, #1
|
||||
; CHECK-NEXT: cmp r7, #3
|
||||
; CHECK-NEXT: bhs .LBB7_6
|
||||
; CHECK-NEXT: @ %bb.3:
|
||||
@ -900,8 +900,8 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(ptr nocapture readonly
|
||||
; CHECK-NEXT: cmpeq r7, #0
|
||||
; CHECK-NEXT: beq .LBB9_4
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
|
||||
; CHECK-NEXT: subs r7, r4, #1
|
||||
; CHECK-NEXT: and r12, r4, #3
|
||||
; CHECK-NEXT: subs r7, r4, #1
|
||||
; CHECK-NEXT: cmp r7, #3
|
||||
; CHECK-NEXT: bhs .LBB9_6
|
||||
; CHECK-NEXT: @ %bb.3:
|
||||
|
@ -446,8 +446,8 @@ define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(ptr nocapture read
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: vdup.32 q0, r3
|
||||
; CHECK-NEXT: movs r3, #1
|
||||
; CHECK-NEXT: add.w r3, r3, r7, lsr #2
|
||||
; CHECK-NEXT: vmov.32 q0[0], r12
|
||||
; CHECK-NEXT: add.w r3, r3, r7, lsr #2
|
||||
; CHECK-NEXT: dls lr, r3
|
||||
; CHECK-NEXT: .LBB6_5: @ %vector.body46
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
|
@ -1060,12 +1060,12 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no
|
||||
; CHECK-NEXT: vfma.f32 q0, q4, r5
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r4, #-8]
|
||||
; CHECK-NEXT: vfma.f32 q0, q5, r6
|
||||
; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
|
||||
; CHECK-NEXT: vfma.f32 q0, q2, lr
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r4, #-4]
|
||||
; CHECK-NEXT: vfma.f32 q0, q2, lr
|
||||
; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
|
||||
; CHECK-NEXT: vfma.f32 q0, q3, r11
|
||||
; CHECK-NEXT: cmp r0, #16
|
||||
; CHECK-NEXT: vfma.f32 q0, q1, r8
|
||||
; CHECK-NEXT: cmp r0, #16
|
||||
; CHECK-NEXT: blo .LBB16_9
|
||||
; CHECK-NEXT: @ %bb.7: @ %for.body.preheader
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_6 Depth=1
|
||||
@ -1603,8 +1603,8 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_df1_f32(ptr nocapture readonly %
|
||||
; CHECK-NEXT: .LBB19_3: @ %do.body
|
||||
; CHECK-NEXT: @ =>This Loop Header: Depth=1
|
||||
; CHECK-NEXT: @ Child Loop BB19_5 Depth 2
|
||||
; CHECK-NEXT: mov r6, r2
|
||||
; CHECK-NEXT: ldrd r5, r11, [r9]
|
||||
; CHECK-NEXT: mov r6, r2
|
||||
; CHECK-NEXT: ldrd r8, r10, [r9, #8]
|
||||
; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill
|
||||
|
@ -1376,8 +1376,8 @@ define arm_aapcs_vfpcc void @gather_inc_v16i8_simple(ptr noalias nocapture reado
|
||||
; CHECK-NEXT: bne .LBB16_3
|
||||
; CHECK-NEXT: @ %bb.4: @ %middle.block
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_2 Depth=1
|
||||
; CHECK-NEXT: ldr r1, [sp, #60] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldr.w r9, [sp, #52] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldr r1, [sp, #60] @ 4-byte Reload
|
||||
; CHECK-NEXT: cmp r9, r1
|
||||
; CHECK-NEXT: bne .LBB16_2
|
||||
; CHECK-NEXT: .LBB16_5: @ %for.cond.cleanup
|
||||
|
@ -712,8 +712,8 @@ define dso_local void @arm_mat_mult_q15(ptr noalias nocapture readonly %A, ptr n
|
||||
; CHECK-NEXT: @ %bb.12: @ %middle.block
|
||||
; CHECK-NEXT: @ in Loop: Header=BB10_8 Depth=2
|
||||
; CHECK-NEXT: vaddv.u32 r10, q4
|
||||
; CHECK-NEXT: cmp r2, r12
|
||||
; CHECK-NEXT: mov r4, r2
|
||||
; CHECK-NEXT: cmp r2, r12
|
||||
; CHECK-NEXT: beq .LBB10_7
|
||||
; CHECK-NEXT: .LBB10_13: @ %for.body8.us.us.preheader
|
||||
; CHECK-NEXT: @ in Loop: Header=BB10_8 Depth=2
|
||||
|
@ -180,9 +180,9 @@ define void @correlate(ptr nocapture noundef readonly %ID, ptr nocapture noundef
|
||||
; CHECK-NEXT: @ in Loop: Header=BB4_4 Depth=1
|
||||
; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: add.w r2, r9, r10
|
||||
; CHECK-NEXT: sub.w r5, r8, r9
|
||||
; CHECK-NEXT: add.w r7, r1, r9, lsl #1
|
||||
; CHECK-NEXT: add.w r2, r1, r2, lsl #1
|
||||
; CHECK-NEXT: sub.w r5, r8, r9
|
||||
; CHECK-NEXT: dlstp.32 lr, r5
|
||||
; CHECK-NEXT: .LBB4_11: @ %vec.epilog.vector.body
|
||||
; CHECK-NEXT: @ Parent Loop BB4_4 Depth=1
|
||||
|
@ -258,11 +258,11 @@ define void @DCT_mve3(ptr nocapture readonly %S, ptr nocapture readonly %pIn, pt
|
||||
; CHECK-NEXT: @ Child Loop BB2_3 Depth 2
|
||||
; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
|
||||
; CHECK-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: adds r0, r5, #2
|
||||
; CHECK-NEXT: adds r2, r5, #1
|
||||
; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
|
||||
; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: mov r3, r9
|
||||
; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
|
||||
; CHECK-NEXT: mov r0, r12
|
||||
; CHECK-NEXT: mov r4, r10
|
||||
; CHECK-NEXT: vmov q2, q0
|
||||
@ -618,13 +618,13 @@ define void @DCT_mve5(ptr nocapture readonly %S, ptr nocapture readonly %pIn, pt
|
||||
; CHECK-NEXT: adds r1, r0, #4
|
||||
; CHECK-NEXT: ldr r4, [sp, #20] @ 4-byte Reload
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: add.w r10, r0, #2
|
||||
; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill
|
||||
; CHECK-NEXT: adds r1, r0, #3
|
||||
; CHECK-NEXT: add.w r10, r0, #2
|
||||
; CHECK-NEXT: add.w r11, r0, #1
|
||||
; CHECK-NEXT: str r1, [sp, #24] @ 4-byte Spill
|
||||
; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: mov r3, r8
|
||||
; CHECK-NEXT: str r1, [sp, #24] @ 4-byte Spill
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: vmov q3, q1
|
||||
; CHECK-NEXT: vmov q2, q1
|
||||
@ -833,8 +833,8 @@ define void @DCT_mve6(ptr nocapture readonly %S, ptr nocapture readonly %pIn, pt
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
|
||||
; CHECK-NEXT: add.w r11, r0, #2
|
||||
; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: adds r4, r0, #1
|
||||
; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: mov r3, r8
|
||||
; CHECK-NEXT: vmov q3, q1
|
||||
; CHECK-NEXT: vmov q4, q1
|
||||
@ -1068,8 +1068,8 @@ define void @DCT_mve7(ptr nocapture readonly %S, ptr nocapture readonly %pIn, pt
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x0
|
||||
; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
|
||||
; CHECK-NEXT: adds r4, r0, #2
|
||||
; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: add.w r8, r0, #1
|
||||
; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: mov r3, r9
|
||||
; CHECK-NEXT: vmov q4, q2
|
||||
; CHECK-NEXT: vmov q5, q2
|
||||
@ -1347,11 +1347,11 @@ define void @DCT_mve8(ptr nocapture readonly %S, ptr nocapture readonly %pIn, pt
|
||||
; CHECK-NEXT: adds r1, r0, #4
|
||||
; CHECK-NEXT: ldr.w r9, [sp, #20] @ 4-byte Reload
|
||||
; CHECK-NEXT: vmov.i32 q3, #0x0
|
||||
; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: adds r4, r0, #3
|
||||
; CHECK-NEXT: str r1, [sp, #24] @ 4-byte Spill
|
||||
; CHECK-NEXT: adds r4, r0, #3
|
||||
; CHECK-NEXT: add.w r8, r0, #2
|
||||
; CHECK-NEXT: adds r1, r0, #1
|
||||
; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: mov r3, r12
|
||||
; CHECK-NEXT: vmov q5, q3
|
||||
; CHECK-NEXT: vmov q6, q3
|
||||
|
@ -100,8 +100,8 @@ define void @arm_cmplx_dot_prod_q15(ptr nocapture readonly %pSrcA, ptr nocapture
|
||||
; CHECK-NEXT: ldr.w r8, [sp, #36]
|
||||
; CHECK-NEXT: mov r6, r12
|
||||
; CHECK-NEXT: mov r5, r7
|
||||
; CHECK-NEXT: and r2, r2, #3
|
||||
; CHECK-NEXT: lsrl r6, r5, #6
|
||||
; CHECK-NEXT: and r2, r2, #3
|
||||
; CHECK-NEXT: wls lr, r2, .LBB1_7
|
||||
; CHECK-NEXT: .LBB1_5: @ %while.body11
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
|
@ -708,14 +708,14 @@ define ptr @signext(ptr %input_row, ptr %input_col, i16 zeroext %output_ch, i16
|
||||
; CHECK-NEXT: mov r6, r12
|
||||
; CHECK-NEXT: .LBB5_4: @ %for.cond.cleanup23
|
||||
; CHECK-NEXT: @ in Loop: Header=BB5_5 Depth=1
|
||||
; CHECK-NEXT: add.w r0, r8, r10
|
||||
; CHECK-NEXT: ldr r1, [sp, #100]
|
||||
; CHECK-NEXT: add.w r0, r8, r10
|
||||
; CHECK-NEXT: add r0, r6
|
||||
; CHECK-NEXT: add r0, r12
|
||||
; CHECK-NEXT: strb.w r0, [r1, r11]
|
||||
; CHECK-NEXT: add.w r11, r11, #1
|
||||
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
|
||||
; CHECK-NEXT: cmp r11, r0
|
||||
; CHECK-NEXT: beq .LBB5_8
|
||||
; CHECK-NEXT: .LBB5_5: @ %for.body
|
||||
@ -933,14 +933,14 @@ define ptr @signext_optsize(ptr %input_row, ptr %input_col, i16 zeroext %output_
|
||||
; CHECK-NEXT: mov r6, r12
|
||||
; CHECK-NEXT: .LBB6_7: @ %for.cond.cleanup23
|
||||
; CHECK-NEXT: @ in Loop: Header=BB6_3 Depth=1
|
||||
; CHECK-NEXT: add.w r0, r8, r10
|
||||
; CHECK-NEXT: ldr r1, [sp, #100]
|
||||
; CHECK-NEXT: add.w r0, r8, r10
|
||||
; CHECK-NEXT: add r0, r6
|
||||
; CHECK-NEXT: add r0, r12
|
||||
; CHECK-NEXT: strb.w r0, [r1, r11]
|
||||
; CHECK-NEXT: add.w r11, r11, #1
|
||||
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
|
||||
; CHECK-NEXT: cmp r11, r0
|
||||
; CHECK-NEXT: bne .LBB6_3
|
||||
; CHECK-NEXT: .LBB6_8: @ %if.end
|
||||
|
@ -11,8 +11,8 @@ define void @arm_min_helium_f32(ptr %pSrc, i32 %blockSize, ptr nocapture %pResul
|
||||
; CHECK-NEXT: vidup.u32 q2, r4, #1
|
||||
; CHECK-NEXT: movw r5, #54437
|
||||
; CHECK-NEXT: movt r5, #21352
|
||||
; CHECK-NEXT: vdup.32 q1, r5
|
||||
; CHECK-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-NEXT: vdup.32 q1, r5
|
||||
; CHECK-NEXT: dlstp.32 lr, r1
|
||||
; CHECK-NEXT: .LBB0_1: @ %do.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
|
@ -11,10 +11,10 @@ define arm_aapcs_vfpcc void @start12(ptr nocapture readonly %x, ptr nocapture re
|
||||
; CHECK-NEXT: poplt {r4, pc}
|
||||
; CHECK-NEXT: .LBB0_1: @ %vector.ph
|
||||
; CHECK-NEXT: vmov r12, s0
|
||||
; CHECK-NEXT: subs r3, #12
|
||||
; CHECK-NEXT: adds r0, #48
|
||||
; CHECK-NEXT: adds r1, #48
|
||||
; CHECK-NEXT: adds r2, #48
|
||||
; CHECK-NEXT: subs r3, #12
|
||||
; CHECK-NEXT: dlstp.32 lr, r3
|
||||
; CHECK-NEXT: .LBB0_2: @ %vector.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
|
@ -42,9 +42,9 @@ define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 {
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: add r0, sp, #28
|
||||
; CHECK-NEXT: movs r5, #0
|
||||
; CHECK-NEXT: cmp r4, #1
|
||||
; CHECK-NEXT: stm r0!, {r1, r2, r3}
|
||||
; CHECK-NEXT: add r0, sp, #28
|
||||
; CHECK-NEXT: cmp r4, #1
|
||||
; CHECK-NEXT: str r0, [sp, #4]
|
||||
; CHECK-NEXT: blt .LBB0_2
|
||||
; CHECK-NEXT: .LBB0_1: @ %for.body
|
||||
|
@ -46,8 +46,8 @@ define hidden i32 @f(i32 %n) local_unnamed_addr #0 {
|
||||
; CHECK-NEXT: cmp r5, #1
|
||||
; CHECK-NEXT: blt .LBB0_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
|
||||
; CHECK-NEXT: subs r0, r5, #1
|
||||
; CHECK-NEXT: and r12, r5, #3
|
||||
; CHECK-NEXT: subs r0, r5, #1
|
||||
; CHECK-NEXT: cmp r0, #3
|
||||
; CHECK-NEXT: bhs .LBB0_4
|
||||
; CHECK-NEXT: @ %bb.2:
|
||||
|
@ -58,8 +58,8 @@ define void @double_foobar() {
|
||||
; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: movs r0, #2
|
||||
; CHECK-NEXT: str r0, [r1]
|
||||
; CHECK-NEXT: add r1, sp, #4
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: add r1, sp, #4
|
||||
; CHECK-NEXT: ldr r0, [r1, #8]
|
||||
; CHECK-NEXT: mov sp, r0
|
||||
; CHECK-NEXT: ldr r0, [r1, #4]
|
||||
|
@ -23,9 +23,9 @@ define i32 @simple(ptr %a, ptr %b, i32 %x) nounwind {
|
||||
; A9-NEXT: add.w r4, lr, r2
|
||||
; A9-NEXT: ldr.w r6, [lr, r2]
|
||||
; A9-NEXT: add r0, r3
|
||||
; A9-NEXT: adds r3, r4, r2
|
||||
; A9-NEXT: add r0, r12
|
||||
; A9-NEXT: ldr r5, [r4, r2]
|
||||
; A9-NEXT: add r0, r12
|
||||
; A9-NEXT: adds r3, r4, r2
|
||||
; A9-NEXT: add r0, r6
|
||||
; A9-NEXT: add r3, r2
|
||||
; A9-NEXT: add r0, r5
|
||||
|
Loading…
x
Reference in New Issue
Block a user