llvm-project/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
Diana Picus ac005e16f6
Reapply "[AMDGPU] Intrinsic for launching whole wave functions" (#153584)
This reverts commit 14cd1339318b16e08c1363ec6896bd7d1e4ae281. The
buildbot failure seems to have been a cmake issue which has been
discussed in more detail in this Discourse post:

https://discourse.llvm.org/t/cmake-doesnt-regenerate-all-tablegen-target-files/87901

If any buildbots fail to select arbitrary intrinsics with this patch,
it's worth considering using clean builds with ccache instead of
incremental builds, as recommended here:

https://llvm.org/docs/HowToAddABuilder.html#:~:text=Use%20CCache%20and%20NOT%20incremental%20builds

The original commit message for this patch:
Add the llvm.amdgcn.call.whole.wave intrinsic for calling whole wave
functions. This will take as its first argument the callee with the
amdgpu_gfx_whole_wave calling convention, followed by the call
parameters which must match the signature of the callee except for the
first function argument (the i1 original EXEC mask, which doesn't need
to be passed in). Indirect calls are not allowed.

Make direct calls to amdgpu_gfx_whole_wave functions a verifier error.

Tail calls are handled in a future patch.
2025-08-15 10:12:47 +02:00

3839 lines
203 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck --check-prefix=DAGISEL %s
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck --check-prefix=GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=DAGISEL64 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GISEL64 %s
; Make sure the i1 %active is passed through EXEC.
; The EXEC mask should be set to -1 for the duration of the function
; and restored to its original value in the epilogue.
; We will also need to restore the inactive lanes for any allocated VGPRs.
define amdgpu_gfx_whole_wave i32 @basic_test(i1 %active, i32 %a, i32 %b) {
; DAGISEL-LABEL: basic_test:
; DAGISEL: ; %bb.0:
; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL-NEXT: s_wait_expcnt 0x0
; DAGISEL-NEXT: s_wait_samplecnt 0x0
; DAGISEL-NEXT: s_wait_bvhcnt 0x0
; DAGISEL-NEXT: s_wait_kmcnt 0x0
; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1
; DAGISEL-NEXT: s_clause 0x1
; DAGISEL-NEXT: scratch_store_b32 off, v0, s32
; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4
; DAGISEL-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-NEXT: s_wait_alu 0xfffe
; DAGISEL-NEXT: v_dual_cndmask_b32 v0, 5, v0 :: v_dual_cndmask_b32 v1, 3, v1
; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; DAGISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1
; DAGISEL-NEXT: s_clause 0x1
; DAGISEL-NEXT: scratch_load_b32 v0, off, s32
; DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4
; DAGISEL-NEXT: s_mov_b32 exec_lo, vcc_lo
; DAGISEL-NEXT: s_wait_loadcnt 0x0
; DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: basic_test:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-NEXT: s_wait_expcnt 0x0
; GISEL-NEXT: s_wait_samplecnt 0x0
; GISEL-NEXT: s_wait_bvhcnt 0x0
; GISEL-NEXT: s_wait_kmcnt 0x0
; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1
; GISEL-NEXT: s_clause 0x1
; GISEL-NEXT: scratch_store_b32 off, v0, s32
; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4
; GISEL-NEXT: s_mov_b32 exec_lo, -1
; GISEL-NEXT: s_wait_alu 0xfffe
; GISEL-NEXT: v_dual_cndmask_b32 v0, 5, v0 :: v_dual_cndmask_b32 v1, 3, v1
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1
; GISEL-NEXT: s_clause 0x1
; GISEL-NEXT: scratch_load_b32 v0, off, s32
; GISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4
; GISEL-NEXT: s_mov_b32 exec_lo, vcc_lo
; GISEL-NEXT: s_wait_loadcnt 0x0
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL64-LABEL: basic_test:
; DAGISEL64: ; %bb.0:
; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL64-NEXT: s_wait_expcnt 0x0
; DAGISEL64-NEXT: s_wait_samplecnt 0x0
; DAGISEL64-NEXT: s_wait_bvhcnt 0x0
; DAGISEL64-NEXT: s_wait_kmcnt 0x0
; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1
; DAGISEL64-NEXT: s_clause 0x1
; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32
; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4
; DAGISEL64-NEXT: s_mov_b64 exec, -1
; DAGISEL64-NEXT: s_wait_alu 0xfffe
; DAGISEL64-NEXT: v_cndmask_b32_e32 v0, 5, v0, vcc
; DAGISEL64-NEXT: v_cndmask_b32_e32 v1, 3, v1, vcc
; DAGISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL64-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; DAGISEL64-NEXT: s_xor_b64 exec, vcc, -1
; DAGISEL64-NEXT: s_clause 0x1
; DAGISEL64-NEXT: scratch_load_b32 v0, off, s32
; DAGISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4
; DAGISEL64-NEXT: s_mov_b64 exec, vcc
; DAGISEL64-NEXT: s_wait_loadcnt 0x0
; DAGISEL64-NEXT: s_setpc_b64 s[30:31]
;
; GISEL64-LABEL: basic_test:
; GISEL64: ; %bb.0:
; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL64-NEXT: s_wait_expcnt 0x0
; GISEL64-NEXT: s_wait_samplecnt 0x0
; GISEL64-NEXT: s_wait_bvhcnt 0x0
; GISEL64-NEXT: s_wait_kmcnt 0x0
; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1
; GISEL64-NEXT: s_clause 0x1
; GISEL64-NEXT: scratch_store_b32 off, v0, s32
; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4
; GISEL64-NEXT: s_mov_b64 exec, -1
; GISEL64-NEXT: s_wait_alu 0xfffe
; GISEL64-NEXT: v_cndmask_b32_e32 v0, 5, v0, vcc
; GISEL64-NEXT: v_cndmask_b32_e32 v1, 3, v1, vcc
; GISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL64-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GISEL64-NEXT: s_xor_b64 exec, vcc, -1
; GISEL64-NEXT: s_clause 0x1
; GISEL64-NEXT: scratch_load_b32 v0, off, s32
; GISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4
; GISEL64-NEXT: s_mov_b64 exec, vcc
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_setpc_b64 s[30:31]
%x = select i1 %active, i32 %a, i32 5
%y = select i1 %active, i32 %b, i32 3
%ret = call i32 @llvm.amdgcn.update.dpp.i32(i32 %x, i32 %y, i32 1, i32 1, i32 1, i1 false)
ret i32 %ret
}
; Make sure we don't crash if there's only one use for %active.
define amdgpu_gfx_whole_wave i32 @single_use_of_active(i1 %active, i32 %a, i32 %b) {
; DAGISEL-LABEL: single_use_of_active:
; DAGISEL: ; %bb.0:
; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL-NEXT: s_wait_expcnt 0x0
; DAGISEL-NEXT: s_wait_samplecnt 0x0
; DAGISEL-NEXT: s_wait_bvhcnt 0x0
; DAGISEL-NEXT: s_wait_kmcnt 0x0
; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1
; DAGISEL-NEXT: s_clause 0x1
; DAGISEL-NEXT: scratch_store_b32 off, v0, s32
; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4
; DAGISEL-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-NEXT: s_wait_alu 0xfffe
; DAGISEL-NEXT: v_cndmask_b32_e32 v1, 17, v1, vcc_lo
; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; DAGISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1
; DAGISEL-NEXT: s_clause 0x1
; DAGISEL-NEXT: scratch_load_b32 v0, off, s32
; DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4
; DAGISEL-NEXT: s_mov_b32 exec_lo, vcc_lo
; DAGISEL-NEXT: s_wait_loadcnt 0x0
; DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: single_use_of_active:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-NEXT: s_wait_expcnt 0x0
; GISEL-NEXT: s_wait_samplecnt 0x0
; GISEL-NEXT: s_wait_bvhcnt 0x0
; GISEL-NEXT: s_wait_kmcnt 0x0
; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1
; GISEL-NEXT: s_clause 0x1
; GISEL-NEXT: scratch_store_b32 off, v0, s32
; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4
; GISEL-NEXT: s_mov_b32 exec_lo, -1
; GISEL-NEXT: s_wait_alu 0xfffe
; GISEL-NEXT: v_cndmask_b32_e32 v1, 17, v1, vcc_lo
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1
; GISEL-NEXT: s_clause 0x1
; GISEL-NEXT: scratch_load_b32 v0, off, s32
; GISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4
; GISEL-NEXT: s_mov_b32 exec_lo, vcc_lo
; GISEL-NEXT: s_wait_loadcnt 0x0
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL64-LABEL: single_use_of_active:
; DAGISEL64: ; %bb.0:
; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL64-NEXT: s_wait_expcnt 0x0
; DAGISEL64-NEXT: s_wait_samplecnt 0x0
; DAGISEL64-NEXT: s_wait_bvhcnt 0x0
; DAGISEL64-NEXT: s_wait_kmcnt 0x0
; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1
; DAGISEL64-NEXT: s_clause 0x1
; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32
; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4
; DAGISEL64-NEXT: s_mov_b64 exec, -1
; DAGISEL64-NEXT: s_wait_alu 0xfffe
; DAGISEL64-NEXT: v_cndmask_b32_e32 v1, 17, v1, vcc
; DAGISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL64-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; DAGISEL64-NEXT: s_xor_b64 exec, vcc, -1
; DAGISEL64-NEXT: s_clause 0x1
; DAGISEL64-NEXT: scratch_load_b32 v0, off, s32
; DAGISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4
; DAGISEL64-NEXT: s_mov_b64 exec, vcc
; DAGISEL64-NEXT: s_wait_loadcnt 0x0
; DAGISEL64-NEXT: s_setpc_b64 s[30:31]
;
; GISEL64-LABEL: single_use_of_active:
; GISEL64: ; %bb.0:
; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL64-NEXT: s_wait_expcnt 0x0
; GISEL64-NEXT: s_wait_samplecnt 0x0
; GISEL64-NEXT: s_wait_bvhcnt 0x0
; GISEL64-NEXT: s_wait_kmcnt 0x0
; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1
; GISEL64-NEXT: s_clause 0x1
; GISEL64-NEXT: scratch_store_b32 off, v0, s32
; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4
; GISEL64-NEXT: s_mov_b64 exec, -1
; GISEL64-NEXT: s_wait_alu 0xfffe
; GISEL64-NEXT: v_cndmask_b32_e32 v1, 17, v1, vcc
; GISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL64-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GISEL64-NEXT: s_xor_b64 exec, vcc, -1
; GISEL64-NEXT: s_clause 0x1
; GISEL64-NEXT: scratch_load_b32 v0, off, s32
; GISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4
; GISEL64-NEXT: s_mov_b64 exec, vcc
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_setpc_b64 s[30:31]
%y = select i1 %active, i32 %b, i32 17
%ret = call i32 @llvm.amdgcn.update.dpp.i32(i32 %a, i32 %y, i32 1, i32 1, i32 1, i1 false)
ret i32 %ret
}
; Make sure we don't crash if %active is not used at all.
define amdgpu_gfx_whole_wave i32 @unused_active(i1 %active, i32 %a, i32 %b) {
; DAGISEL-LABEL: unused_active:
; DAGISEL: ; %bb.0:
; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL-NEXT: s_wait_expcnt 0x0
; DAGISEL-NEXT: s_wait_samplecnt 0x0
; DAGISEL-NEXT: s_wait_bvhcnt 0x0
; DAGISEL-NEXT: s_wait_kmcnt 0x0
; DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1
; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; DAGISEL-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-NEXT: v_mov_b32_e32 v0, 14
; DAGISEL-NEXT: s_wait_alu 0xfffe
; DAGISEL-NEXT: s_xor_b32 exec_lo, s0, -1
; DAGISEL-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; DAGISEL-NEXT: s_mov_b32 exec_lo, s0
; DAGISEL-NEXT: s_wait_loadcnt 0x0
; DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: unused_active:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-NEXT: s_wait_expcnt 0x0
; GISEL-NEXT: s_wait_samplecnt 0x0
; GISEL-NEXT: s_wait_bvhcnt 0x0
; GISEL-NEXT: s_wait_kmcnt 0x0
; GISEL-NEXT: s_xor_saveexec_b32 s0, -1
; GISEL-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; GISEL-NEXT: s_mov_b32 exec_lo, -1
; GISEL-NEXT: v_mov_b32_e32 v0, 14
; GISEL-NEXT: s_wait_alu 0xfffe
; GISEL-NEXT: s_xor_b32 exec_lo, s0, -1
; GISEL-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; GISEL-NEXT: s_mov_b32 exec_lo, s0
; GISEL-NEXT: s_wait_loadcnt 0x0
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL64-LABEL: unused_active:
; DAGISEL64: ; %bb.0:
; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL64-NEXT: s_wait_expcnt 0x0
; DAGISEL64-NEXT: s_wait_samplecnt 0x0
; DAGISEL64-NEXT: s_wait_bvhcnt 0x0
; DAGISEL64-NEXT: s_wait_kmcnt 0x0
; DAGISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1
; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; DAGISEL64-NEXT: s_mov_b64 exec, -1
; DAGISEL64-NEXT: v_mov_b32_e32 v0, 14
; DAGISEL64-NEXT: s_wait_alu 0xfffe
; DAGISEL64-NEXT: s_xor_b64 exec, s[0:1], -1
; DAGISEL64-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; DAGISEL64-NEXT: s_mov_b64 exec, s[0:1]
; DAGISEL64-NEXT: s_wait_loadcnt 0x0
; DAGISEL64-NEXT: s_setpc_b64 s[30:31]
;
; GISEL64-LABEL: unused_active:
; GISEL64: ; %bb.0:
; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL64-NEXT: s_wait_expcnt 0x0
; GISEL64-NEXT: s_wait_samplecnt 0x0
; GISEL64-NEXT: s_wait_bvhcnt 0x0
; GISEL64-NEXT: s_wait_kmcnt 0x0
; GISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1
; GISEL64-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; GISEL64-NEXT: s_mov_b64 exec, -1
; GISEL64-NEXT: v_mov_b32_e32 v0, 14
; GISEL64-NEXT: s_wait_alu 0xfffe
; GISEL64-NEXT: s_xor_b64 exec, s[0:1], -1
; GISEL64-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; GISEL64-NEXT: s_mov_b64 exec, s[0:1]
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_setpc_b64 s[30:31]
ret i32 14
}
; For any used VGPRs (including those used for SGPR spills), we need to restore the inactive lanes.
; For CSR VGPRs, we need to restore all lanes.
define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) {
; DAGISEL-LABEL: csr:
; DAGISEL: ; %bb.0:
; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL-NEXT: s_wait_expcnt 0x0
; DAGISEL-NEXT: s_wait_samplecnt 0x0
; DAGISEL-NEXT: s_wait_bvhcnt 0x0
; DAGISEL-NEXT: s_wait_kmcnt 0x0
; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1
; DAGISEL-NEXT: s_clause 0x3
; DAGISEL-NEXT: scratch_store_b32 off, v2, s32
; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 offset:4
; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:8
; DAGISEL-NEXT: scratch_store_b32 off, v49, s32 offset:16
; DAGISEL-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill
; DAGISEL-NEXT: ;;#ASMSTART
; DAGISEL-NEXT: ; clobber CSR
; DAGISEL-NEXT: ;;#ASMEND
; DAGISEL-NEXT: v_writelane_b32 v2, s20, 0
; DAGISEL-NEXT: ;;#ASMSTART
; DAGISEL-NEXT: ; clobber non-CSR
; DAGISEL-NEXT: ;;#ASMEND
; DAGISEL-NEXT: scratch_load_b32 v40, off, s32 offset:12 ; 4-byte Folded Reload
; DAGISEL-NEXT: s_wait_alu 0xfffe
; DAGISEL-NEXT: v_dual_cndmask_b32 v0, 5, v0 :: v_dual_cndmask_b32 v1, 3, v1
; DAGISEL-NEXT: v_readlane_b32 s20, v2, 0
; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; DAGISEL-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; DAGISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1
; DAGISEL-NEXT: s_clause 0x3
; DAGISEL-NEXT: scratch_load_b32 v2, off, s32
; DAGISEL-NEXT: scratch_load_b32 v0, off, s32 offset:4
; DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:8
; DAGISEL-NEXT: scratch_load_b32 v49, off, s32 offset:16
; DAGISEL-NEXT: s_mov_b32 exec_lo, vcc_lo
; DAGISEL-NEXT: s_wait_loadcnt 0x0
; DAGISEL-NEXT: s_wait_alu 0xf1ff
; DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: csr:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-NEXT: s_wait_expcnt 0x0
; GISEL-NEXT: s_wait_samplecnt 0x0
; GISEL-NEXT: s_wait_bvhcnt 0x0
; GISEL-NEXT: s_wait_kmcnt 0x0
; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1
; GISEL-NEXT: s_clause 0x3
; GISEL-NEXT: scratch_store_b32 off, v2, s32
; GISEL-NEXT: scratch_store_b32 off, v0, s32 offset:4
; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:8
; GISEL-NEXT: scratch_store_b32 off, v49, s32 offset:16
; GISEL-NEXT: s_mov_b32 exec_lo, -1
; GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill
; GISEL-NEXT: ;;#ASMSTART
; GISEL-NEXT: ; clobber CSR
; GISEL-NEXT: ;;#ASMEND
; GISEL-NEXT: v_writelane_b32 v2, s20, 0
; GISEL-NEXT: ;;#ASMSTART
; GISEL-NEXT: ; clobber non-CSR
; GISEL-NEXT: ;;#ASMEND
; GISEL-NEXT: scratch_load_b32 v40, off, s32 offset:12 ; 4-byte Folded Reload
; GISEL-NEXT: s_wait_alu 0xfffe
; GISEL-NEXT: v_dual_cndmask_b32 v0, 5, v0 :: v_dual_cndmask_b32 v1, 3, v1
; GISEL-NEXT: v_readlane_b32 s20, v2, 0
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GISEL-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1
; GISEL-NEXT: s_clause 0x3
; GISEL-NEXT: scratch_load_b32 v2, off, s32
; GISEL-NEXT: scratch_load_b32 v0, off, s32 offset:4
; GISEL-NEXT: scratch_load_b32 v1, off, s32 offset:8
; GISEL-NEXT: scratch_load_b32 v49, off, s32 offset:16
; GISEL-NEXT: s_mov_b32 exec_lo, vcc_lo
; GISEL-NEXT: s_wait_loadcnt 0x0
; GISEL-NEXT: s_wait_alu 0xf1ff
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL64-LABEL: csr:
; DAGISEL64: ; %bb.0:
; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL64-NEXT: s_wait_expcnt 0x0
; DAGISEL64-NEXT: s_wait_samplecnt 0x0
; DAGISEL64-NEXT: s_wait_bvhcnt 0x0
; DAGISEL64-NEXT: s_wait_kmcnt 0x0
; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1
; DAGISEL64-NEXT: s_clause 0x3
; DAGISEL64-NEXT: scratch_store_b32 off, v2, s32
; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 offset:4
; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:8
; DAGISEL64-NEXT: scratch_store_b32 off, v49, s32 offset:16
; DAGISEL64-NEXT: s_mov_b64 exec, -1
; DAGISEL64-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill
; DAGISEL64-NEXT: ;;#ASMSTART
; DAGISEL64-NEXT: ; clobber CSR
; DAGISEL64-NEXT: ;;#ASMEND
; DAGISEL64-NEXT: v_writelane_b32 v2, s20, 0
; DAGISEL64-NEXT: ;;#ASMSTART
; DAGISEL64-NEXT: ; clobber non-CSR
; DAGISEL64-NEXT: ;;#ASMEND
; DAGISEL64-NEXT: scratch_load_b32 v40, off, s32 offset:12 ; 4-byte Folded Reload
; DAGISEL64-NEXT: s_wait_alu 0xfffe
; DAGISEL64-NEXT: v_cndmask_b32_e32 v0, 5, v0, vcc
; DAGISEL64-NEXT: v_cndmask_b32_e32 v1, 3, v1, vcc
; DAGISEL64-NEXT: v_readlane_b32 s20, v2, 0
; DAGISEL64-NEXT: s_delay_alu instid0(VALU_DEP_2)
; DAGISEL64-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; DAGISEL64-NEXT: s_xor_b64 exec, vcc, -1
; DAGISEL64-NEXT: s_clause 0x3
; DAGISEL64-NEXT: scratch_load_b32 v2, off, s32
; DAGISEL64-NEXT: scratch_load_b32 v0, off, s32 offset:4
; DAGISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:8
; DAGISEL64-NEXT: scratch_load_b32 v49, off, s32 offset:16
; DAGISEL64-NEXT: s_mov_b64 exec, vcc
; DAGISEL64-NEXT: s_wait_loadcnt 0x0
; DAGISEL64-NEXT: s_wait_alu 0xf1ff
; DAGISEL64-NEXT: s_setpc_b64 s[30:31]
;
; GISEL64-LABEL: csr:
; GISEL64: ; %bb.0:
; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL64-NEXT: s_wait_expcnt 0x0
; GISEL64-NEXT: s_wait_samplecnt 0x0
; GISEL64-NEXT: s_wait_bvhcnt 0x0
; GISEL64-NEXT: s_wait_kmcnt 0x0
; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1
; GISEL64-NEXT: s_clause 0x3
; GISEL64-NEXT: scratch_store_b32 off, v2, s32
; GISEL64-NEXT: scratch_store_b32 off, v0, s32 offset:4
; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:8
; GISEL64-NEXT: scratch_store_b32 off, v49, s32 offset:16
; GISEL64-NEXT: s_mov_b64 exec, -1
; GISEL64-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill
; GISEL64-NEXT: ;;#ASMSTART
; GISEL64-NEXT: ; clobber CSR
; GISEL64-NEXT: ;;#ASMEND
; GISEL64-NEXT: v_writelane_b32 v2, s20, 0
; GISEL64-NEXT: ;;#ASMSTART
; GISEL64-NEXT: ; clobber non-CSR
; GISEL64-NEXT: ;;#ASMEND
; GISEL64-NEXT: scratch_load_b32 v40, off, s32 offset:12 ; 4-byte Folded Reload
; GISEL64-NEXT: s_wait_alu 0xfffe
; GISEL64-NEXT: v_cndmask_b32_e32 v0, 5, v0, vcc
; GISEL64-NEXT: v_cndmask_b32_e32 v1, 3, v1, vcc
; GISEL64-NEXT: v_readlane_b32 s20, v2, 0
; GISEL64-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GISEL64-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GISEL64-NEXT: s_xor_b64 exec, vcc, -1
; GISEL64-NEXT: s_clause 0x3
; GISEL64-NEXT: scratch_load_b32 v2, off, s32
; GISEL64-NEXT: scratch_load_b32 v0, off, s32 offset:4
; GISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:8
; GISEL64-NEXT: scratch_load_b32 v49, off, s32 offset:16
; GISEL64-NEXT: s_mov_b64 exec, vcc
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_wait_alu 0xf1ff
; GISEL64-NEXT: s_setpc_b64 s[30:31]
%x = select i1 %active, i32 %a, i32 5
%y = select i1 %active, i32 %b, i32 3
call void asm sideeffect "; clobber CSR", "~{v40},~{s48}"()
call void asm sideeffect "; clobber non-CSR", "~{v49},~{s20}"()
%ret = call i32 @llvm.amdgcn.update.dpp.i32(i32 %x, i32 %y, i32 1, i32 1, i32 1, i1 false)
ret i32 %ret
}
; Save and restore all lanes of v40.
define amdgpu_gfx_whole_wave void @csr_vgpr_only(i1 %active, i32 %a, i32 %b) {
; DAGISEL-LABEL: csr_vgpr_only:
; DAGISEL: ; %bb.0:
; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL-NEXT: s_wait_expcnt 0x0
; DAGISEL-NEXT: s_wait_samplecnt 0x0
; DAGISEL-NEXT: s_wait_bvhcnt 0x0
; DAGISEL-NEXT: s_wait_kmcnt 0x0
; DAGISEL-NEXT: s_or_saveexec_b32 s0, -1
; DAGISEL-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
; DAGISEL-NEXT: ;;#ASMSTART
; DAGISEL-NEXT: ; clobber CSR VGPR
; DAGISEL-NEXT: ;;#ASMEND
; DAGISEL-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
; DAGISEL-NEXT: s_wait_alu 0xfffe
; DAGISEL-NEXT: s_mov_b32 exec_lo, s0
; DAGISEL-NEXT: s_wait_loadcnt 0x0
; DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: csr_vgpr_only:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-NEXT: s_wait_expcnt 0x0
; GISEL-NEXT: s_wait_samplecnt 0x0
; GISEL-NEXT: s_wait_bvhcnt 0x0
; GISEL-NEXT: s_wait_kmcnt 0x0
; GISEL-NEXT: s_or_saveexec_b32 s0, -1
; GISEL-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
; GISEL-NEXT: ;;#ASMSTART
; GISEL-NEXT: ; clobber CSR VGPR
; GISEL-NEXT: ;;#ASMEND
; GISEL-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
; GISEL-NEXT: s_wait_alu 0xfffe
; GISEL-NEXT: s_mov_b32 exec_lo, s0
; GISEL-NEXT: s_wait_loadcnt 0x0
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL64-LABEL: csr_vgpr_only:
; DAGISEL64: ; %bb.0:
; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL64-NEXT: s_wait_expcnt 0x0
; DAGISEL64-NEXT: s_wait_samplecnt 0x0
; DAGISEL64-NEXT: s_wait_bvhcnt 0x0
; DAGISEL64-NEXT: s_wait_kmcnt 0x0
; DAGISEL64-NEXT: s_or_saveexec_b64 s[0:1], -1
; DAGISEL64-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
; DAGISEL64-NEXT: ;;#ASMSTART
; DAGISEL64-NEXT: ; clobber CSR VGPR
; DAGISEL64-NEXT: ;;#ASMEND
; DAGISEL64-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
; DAGISEL64-NEXT: s_wait_alu 0xfffe
; DAGISEL64-NEXT: s_mov_b64 exec, s[0:1]
; DAGISEL64-NEXT: s_wait_loadcnt 0x0
; DAGISEL64-NEXT: s_setpc_b64 s[30:31]
;
; GISEL64-LABEL: csr_vgpr_only:
; GISEL64: ; %bb.0:
; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL64-NEXT: s_wait_expcnt 0x0
; GISEL64-NEXT: s_wait_samplecnt 0x0
; GISEL64-NEXT: s_wait_bvhcnt 0x0
; GISEL64-NEXT: s_wait_kmcnt 0x0
; GISEL64-NEXT: s_or_saveexec_b64 s[0:1], -1
; GISEL64-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
; GISEL64-NEXT: ;;#ASMSTART
; GISEL64-NEXT: ; clobber CSR VGPR
; GISEL64-NEXT: ;;#ASMEND
; GISEL64-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
; GISEL64-NEXT: s_wait_alu 0xfffe
; GISEL64-NEXT: s_mov_b64 exec, s[0:1]
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber CSR VGPR", "~{v40}"()
ret void
}
define amdgpu_gfx_whole_wave void @sgpr_spill_only(i1 %active, i32 %a, i32 %b) {
; DAGISEL-LABEL: sgpr_spill_only:
; DAGISEL: ; %bb.0:
; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL-NEXT: s_wait_expcnt 0x0
; DAGISEL-NEXT: s_wait_samplecnt 0x0
; DAGISEL-NEXT: s_wait_bvhcnt 0x0
; DAGISEL-NEXT: s_wait_kmcnt 0x0
; DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1
; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; DAGISEL-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-NEXT: v_writelane_b32 v0, s68, 0
; DAGISEL-NEXT: ;;#ASMSTART
; DAGISEL-NEXT: ; clobber CSR SGPR
; DAGISEL-NEXT: ;;#ASMEND
; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL-NEXT: v_readlane_b32 s68, v0, 0
; DAGISEL-NEXT: s_wait_alu 0xfffe
; DAGISEL-NEXT: s_xor_b32 exec_lo, s0, -1
; DAGISEL-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; DAGISEL-NEXT: s_mov_b32 exec_lo, s0
; DAGISEL-NEXT: s_wait_loadcnt 0x0
; DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: sgpr_spill_only:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-NEXT: s_wait_expcnt 0x0
; GISEL-NEXT: s_wait_samplecnt 0x0
; GISEL-NEXT: s_wait_bvhcnt 0x0
; GISEL-NEXT: s_wait_kmcnt 0x0
; GISEL-NEXT: s_xor_saveexec_b32 s0, -1
; GISEL-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; GISEL-NEXT: s_mov_b32 exec_lo, -1
; GISEL-NEXT: v_writelane_b32 v0, s68, 0
; GISEL-NEXT: ;;#ASMSTART
; GISEL-NEXT: ; clobber CSR SGPR
; GISEL-NEXT: ;;#ASMEND
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-NEXT: v_readlane_b32 s68, v0, 0
; GISEL-NEXT: s_wait_alu 0xfffe
; GISEL-NEXT: s_xor_b32 exec_lo, s0, -1
; GISEL-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; GISEL-NEXT: s_mov_b32 exec_lo, s0
; GISEL-NEXT: s_wait_loadcnt 0x0
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL64-LABEL: sgpr_spill_only:
; DAGISEL64: ; %bb.0:
; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL64-NEXT: s_wait_expcnt 0x0
; DAGISEL64-NEXT: s_wait_samplecnt 0x0
; DAGISEL64-NEXT: s_wait_bvhcnt 0x0
; DAGISEL64-NEXT: s_wait_kmcnt 0x0
; DAGISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1
; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; DAGISEL64-NEXT: s_mov_b64 exec, -1
; DAGISEL64-NEXT: v_writelane_b32 v0, s68, 0
; DAGISEL64-NEXT: ;;#ASMSTART
; DAGISEL64-NEXT: ; clobber CSR SGPR
; DAGISEL64-NEXT: ;;#ASMEND
; DAGISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL64-NEXT: v_readlane_b32 s68, v0, 0
; DAGISEL64-NEXT: s_wait_alu 0xfffe
; DAGISEL64-NEXT: s_xor_b64 exec, s[0:1], -1
; DAGISEL64-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; DAGISEL64-NEXT: s_mov_b64 exec, s[0:1]
; DAGISEL64-NEXT: s_wait_loadcnt 0x0
; DAGISEL64-NEXT: s_setpc_b64 s[30:31]
;
; GISEL64-LABEL: sgpr_spill_only:
; GISEL64: ; %bb.0:
; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL64-NEXT: s_wait_expcnt 0x0
; GISEL64-NEXT: s_wait_samplecnt 0x0
; GISEL64-NEXT: s_wait_bvhcnt 0x0
; GISEL64-NEXT: s_wait_kmcnt 0x0
; GISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1
; GISEL64-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; GISEL64-NEXT: s_mov_b64 exec, -1
; GISEL64-NEXT: v_writelane_b32 v0, s68, 0
; GISEL64-NEXT: ;;#ASMSTART
; GISEL64-NEXT: ; clobber CSR SGPR
; GISEL64-NEXT: ;;#ASMEND
; GISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL64-NEXT: v_readlane_b32 s68, v0, 0
; GISEL64-NEXT: s_wait_alu 0xfffe
; GISEL64-NEXT: s_xor_b64 exec, s[0:1], -1
; GISEL64-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; GISEL64-NEXT: s_mov_b64 exec, s[0:1]
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber CSR SGPR", "~{s68}"()
ret void
}
define amdgpu_gfx_whole_wave i32 @multiple_blocks(i1 %active, i32 %a, i32 %b) {
; DAGISEL-LABEL: multiple_blocks:
; DAGISEL: ; %bb.0:
; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL-NEXT: s_wait_expcnt 0x0
; DAGISEL-NEXT: s_wait_samplecnt 0x0
; DAGISEL-NEXT: s_wait_bvhcnt 0x0
; DAGISEL-NEXT: s_wait_kmcnt 0x0
; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1
; DAGISEL-NEXT: s_clause 0x1
; DAGISEL-NEXT: scratch_store_b32 off, v0, s32
; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4
; DAGISEL-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; DAGISEL-NEXT: s_mov_b32 s1, exec_lo
; DAGISEL-NEXT: v_cmpx_eq_u32_e64 v0, v1
; DAGISEL-NEXT: ; %bb.1: ; %if.then
; DAGISEL-NEXT: v_add_nc_u32_e32 v1, v0, v1
; DAGISEL-NEXT: ; %bb.2: ; %if.end
; DAGISEL-NEXT: s_wait_alu 0xfffe
; DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; DAGISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1
; DAGISEL-NEXT: s_clause 0x1
; DAGISEL-NEXT: scratch_load_b32 v0, off, s32
; DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4
; DAGISEL-NEXT: s_mov_b32 exec_lo, vcc_lo
; DAGISEL-NEXT: s_wait_loadcnt 0x0
; DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: multiple_blocks:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-NEXT: s_wait_expcnt 0x0
; GISEL-NEXT: s_wait_samplecnt 0x0
; GISEL-NEXT: s_wait_bvhcnt 0x0
; GISEL-NEXT: s_wait_kmcnt 0x0
; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1
; GISEL-NEXT: s_clause 0x1
; GISEL-NEXT: scratch_store_b32 off, v0, s32
; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4
; GISEL-NEXT: s_mov_b32 exec_lo, -1
; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GISEL-NEXT: s_mov_b32 s1, exec_lo
; GISEL-NEXT: v_cmpx_eq_u32_e64 v0, v1
; GISEL-NEXT: ; %bb.1: ; %if.then
; GISEL-NEXT: v_add_nc_u32_e32 v1, v0, v1
; GISEL-NEXT: ; %bb.2: ; %if.end
; GISEL-NEXT: s_wait_alu 0xfffe
; GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1
; GISEL-NEXT: s_clause 0x1
; GISEL-NEXT: scratch_load_b32 v0, off, s32
; GISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4
; GISEL-NEXT: s_mov_b32 exec_lo, vcc_lo
; GISEL-NEXT: s_wait_loadcnt 0x0
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL64-LABEL: multiple_blocks:
; DAGISEL64: ; %bb.0:
; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL64-NEXT: s_wait_expcnt 0x0
; DAGISEL64-NEXT: s_wait_samplecnt 0x0
; DAGISEL64-NEXT: s_wait_bvhcnt 0x0
; DAGISEL64-NEXT: s_wait_kmcnt 0x0
; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1
; DAGISEL64-NEXT: s_clause 0x1
; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32
; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4
; DAGISEL64-NEXT: s_mov_b64 exec, -1
; DAGISEL64-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; DAGISEL64-NEXT: s_mov_b64 s[2:3], exec
; DAGISEL64-NEXT: v_cmpx_eq_u32_e64 v0, v1
; DAGISEL64-NEXT: ; %bb.1: ; %if.then
; DAGISEL64-NEXT: v_add_nc_u32_e32 v1, v0, v1
; DAGISEL64-NEXT: ; %bb.2: ; %if.end
; DAGISEL64-NEXT: s_wait_alu 0xfffe
; DAGISEL64-NEXT: s_or_b64 exec, exec, s[2:3]
; DAGISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL64-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; DAGISEL64-NEXT: s_xor_b64 exec, vcc, -1
; DAGISEL64-NEXT: s_clause 0x1
; DAGISEL64-NEXT: scratch_load_b32 v0, off, s32
; DAGISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4
; DAGISEL64-NEXT: s_mov_b64 exec, vcc
; DAGISEL64-NEXT: s_wait_loadcnt 0x0
; DAGISEL64-NEXT: s_setpc_b64 s[30:31]
;
; GISEL64-LABEL: multiple_blocks:
; GISEL64: ; %bb.0:
; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL64-NEXT: s_wait_expcnt 0x0
; GISEL64-NEXT: s_wait_samplecnt 0x0
; GISEL64-NEXT: s_wait_bvhcnt 0x0
; GISEL64-NEXT: s_wait_kmcnt 0x0
; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1
; GISEL64-NEXT: s_clause 0x1
; GISEL64-NEXT: scratch_store_b32 off, v0, s32
; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4
; GISEL64-NEXT: s_mov_b64 exec, -1
; GISEL64-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GISEL64-NEXT: s_mov_b64 s[2:3], exec
; GISEL64-NEXT: v_cmpx_eq_u32_e64 v0, v1
; GISEL64-NEXT: ; %bb.1: ; %if.then
; GISEL64-NEXT: v_add_nc_u32_e32 v1, v0, v1
; GISEL64-NEXT: ; %bb.2: ; %if.end
; GISEL64-NEXT: s_wait_alu 0xfffe
; GISEL64-NEXT: s_or_b64 exec, exec, s[2:3]
; GISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL64-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GISEL64-NEXT: s_xor_b64 exec, vcc, -1
; GISEL64-NEXT: s_clause 0x1
; GISEL64-NEXT: scratch_load_b32 v0, off, s32
; GISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4
; GISEL64-NEXT: s_mov_b64 exec, vcc
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_setpc_b64 s[30:31]
%c = icmp eq i32 %a, %b
br i1 %c, label %if.then, label %if.end
if.then: ; preds = %0
%d = add i32 %a, %b
br label %if.end
if.end:
%f = phi i32 [ %d, %if.then ], [ %b, %0 ]
%e = select i1 %active, i32 %a, i32 %f
ret i32 %e
}
define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) {
; DAGISEL-LABEL: ret_64:
; DAGISEL: ; %bb.0:
; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL-NEXT: s_wait_expcnt 0x0
; DAGISEL-NEXT: s_wait_samplecnt 0x0
; DAGISEL-NEXT: s_wait_bvhcnt 0x0
; DAGISEL-NEXT: s_wait_kmcnt 0x0
; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1
; DAGISEL-NEXT: s_clause 0x3
; DAGISEL-NEXT: scratch_store_b32 off, v0, s32
; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4
; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8
; DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12
; DAGISEL-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-NEXT: s_wait_alu 0xfffe
; DAGISEL-NEXT: v_dual_cndmask_b32 v1, 0, v1 :: v_dual_cndmask_b32 v0, 5, v0
; DAGISEL-NEXT: v_dual_cndmask_b32 v2, 3, v2 :: v_dual_cndmask_b32 v3, 0, v3
; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; DAGISEL-NEXT: v_mov_b32_dpp v0, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; DAGISEL-NEXT: v_mov_b32_dpp v1, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; DAGISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1
; DAGISEL-NEXT: s_clause 0x3
; DAGISEL-NEXT: scratch_load_b32 v0, off, s32
; DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4
; DAGISEL-NEXT: scratch_load_b32 v2, off, s32 offset:8
; DAGISEL-NEXT: scratch_load_b32 v3, off, s32 offset:12
; DAGISEL-NEXT: s_mov_b32 exec_lo, vcc_lo
; DAGISEL-NEXT: s_wait_loadcnt 0x0
; DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: ret_64:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-NEXT: s_wait_expcnt 0x0
; GISEL-NEXT: s_wait_samplecnt 0x0
; GISEL-NEXT: s_wait_bvhcnt 0x0
; GISEL-NEXT: s_wait_kmcnt 0x0
; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1
; GISEL-NEXT: s_clause 0x3
; GISEL-NEXT: scratch_store_b32 off, v0, s32
; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4
; GISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8
; GISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12
; GISEL-NEXT: s_mov_b32 exec_lo, -1
; GISEL-NEXT: s_wait_alu 0xfffe
; GISEL-NEXT: v_dual_cndmask_b32 v0, 5, v0 :: v_dual_cndmask_b32 v1, 0, v1
; GISEL-NEXT: v_dual_cndmask_b32 v2, 3, v2 :: v_dual_cndmask_b32 v3, 0, v3
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GISEL-NEXT: v_mov_b32_dpp v0, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GISEL-NEXT: v_mov_b32_dpp v1, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1
; GISEL-NEXT: s_clause 0x3
; GISEL-NEXT: scratch_load_b32 v0, off, s32
; GISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4
; GISEL-NEXT: scratch_load_b32 v2, off, s32 offset:8
; GISEL-NEXT: scratch_load_b32 v3, off, s32 offset:12
; GISEL-NEXT: s_mov_b32 exec_lo, vcc_lo
; GISEL-NEXT: s_wait_loadcnt 0x0
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL64-LABEL: ret_64:
; DAGISEL64: ; %bb.0:
; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL64-NEXT: s_wait_expcnt 0x0
; DAGISEL64-NEXT: s_wait_samplecnt 0x0
; DAGISEL64-NEXT: s_wait_bvhcnt 0x0
; DAGISEL64-NEXT: s_wait_kmcnt 0x0
; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1
; DAGISEL64-NEXT: s_clause 0x3
; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32
; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4
; DAGISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8
; DAGISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12
; DAGISEL64-NEXT: s_mov_b64 exec, -1
; DAGISEL64-NEXT: s_wait_alu 0xfffe
; DAGISEL64-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; DAGISEL64-NEXT: v_cndmask_b32_e32 v0, 5, v0, vcc
; DAGISEL64-NEXT: v_cndmask_b32_e32 v2, 3, v2, vcc
; DAGISEL64-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
; DAGISEL64-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; DAGISEL64-NEXT: v_mov_b32_dpp v0, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; DAGISEL64-NEXT: v_mov_b32_dpp v1, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; DAGISEL64-NEXT: s_xor_b64 exec, vcc, -1
; DAGISEL64-NEXT: s_clause 0x3
; DAGISEL64-NEXT: scratch_load_b32 v0, off, s32
; DAGISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4
; DAGISEL64-NEXT: scratch_load_b32 v2, off, s32 offset:8
; DAGISEL64-NEXT: scratch_load_b32 v3, off, s32 offset:12
; DAGISEL64-NEXT: s_mov_b64 exec, vcc
; DAGISEL64-NEXT: s_wait_loadcnt 0x0
; DAGISEL64-NEXT: s_setpc_b64 s[30:31]
;
; GISEL64-LABEL: ret_64:
; GISEL64: ; %bb.0:
; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL64-NEXT: s_wait_expcnt 0x0
; GISEL64-NEXT: s_wait_samplecnt 0x0
; GISEL64-NEXT: s_wait_bvhcnt 0x0
; GISEL64-NEXT: s_wait_kmcnt 0x0
; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1
; GISEL64-NEXT: s_clause 0x3
; GISEL64-NEXT: scratch_store_b32 off, v0, s32
; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4
; GISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8
; GISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12
; GISEL64-NEXT: s_mov_b64 exec, -1
; GISEL64-NEXT: s_wait_alu 0xfffe
; GISEL64-NEXT: v_cndmask_b32_e32 v0, 5, v0, vcc
; GISEL64-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; GISEL64-NEXT: v_cndmask_b32_e32 v2, 3, v2, vcc
; GISEL64-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
; GISEL64-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GISEL64-NEXT: v_mov_b32_dpp v0, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GISEL64-NEXT: v_mov_b32_dpp v1, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GISEL64-NEXT: s_xor_b64 exec, vcc, -1
; GISEL64-NEXT: s_clause 0x3
; GISEL64-NEXT: scratch_load_b32 v0, off, s32
; GISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4
; GISEL64-NEXT: scratch_load_b32 v2, off, s32 offset:8
; GISEL64-NEXT: scratch_load_b32 v3, off, s32 offset:12
; GISEL64-NEXT: s_mov_b64 exec, vcc
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_setpc_b64 s[30:31]
%x = select i1 %active, i64 %a, i64 5
%y = select i1 %active, i64 %b, i64 3
%ret = call i64 @llvm.amdgcn.update.dpp.i64(i64 %x, i64 %y, i32 1, i32 1, i32 1, i1 false)
ret i64 %ret
}
define amdgpu_gfx_whole_wave void @inreg_args(i1 %active, i32 inreg %i32, <4 x i32> inreg %v4i32, float inreg %float, ptr addrspace(5) inreg %ptr, ptr addrspace(5) inreg %ptr2) {
; DAGISEL-LABEL: inreg_args:
; DAGISEL: ; %bb.0:
; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL-NEXT: s_wait_expcnt 0x0
; DAGISEL-NEXT: s_wait_samplecnt 0x0
; DAGISEL-NEXT: s_wait_bvhcnt 0x0
; DAGISEL-NEXT: s_wait_kmcnt 0x0
; DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1
; DAGISEL-NEXT: s_clause 0x5
; DAGISEL-NEXT: scratch_store_b32 off, v0, s32
; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4
; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8
; DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12
; DAGISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16
; DAGISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20
; DAGISEL-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s9
; DAGISEL-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6
; DAGISEL-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8
; DAGISEL-NEXT: scratch_store_b32 off, v4, s10
; DAGISEL-NEXT: s_clause 0x1
; DAGISEL-NEXT: scratch_store_b128 off, v[0:3], s11
; DAGISEL-NEXT: scratch_store_b32 off, v5, s11
; DAGISEL-NEXT: s_wait_alu 0xfffe
; DAGISEL-NEXT: s_xor_b32 exec_lo, s0, -1
; DAGISEL-NEXT: s_clause 0x5
; DAGISEL-NEXT: scratch_load_b32 v0, off, s32
; DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4
; DAGISEL-NEXT: scratch_load_b32 v2, off, s32 offset:8
; DAGISEL-NEXT: scratch_load_b32 v3, off, s32 offset:12
; DAGISEL-NEXT: scratch_load_b32 v4, off, s32 offset:16
; DAGISEL-NEXT: scratch_load_b32 v5, off, s32 offset:20
; DAGISEL-NEXT: s_mov_b32 exec_lo, s0
; DAGISEL-NEXT: s_wait_loadcnt 0x0
; DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: inreg_args:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-NEXT: s_wait_expcnt 0x0
; GISEL-NEXT: s_wait_samplecnt 0x0
; GISEL-NEXT: s_wait_bvhcnt 0x0
; GISEL-NEXT: s_wait_kmcnt 0x0
; GISEL-NEXT: s_xor_saveexec_b32 s34, -1
; GISEL-NEXT: s_clause 0x5
; GISEL-NEXT: scratch_store_b32 off, v0, s32
; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4
; GISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8
; GISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12
; GISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16
; GISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20
; GISEL-NEXT: s_mov_b32 exec_lo, -1
; GISEL-NEXT: s_mov_b32 s0, s5
; GISEL-NEXT: s_mov_b32 s1, s6
; GISEL-NEXT: s_mov_b32 s2, s7
; GISEL-NEXT: s_mov_b32 s3, s8
; GISEL-NEXT: v_mov_b32_e32 v4, s4
; GISEL-NEXT: s_wait_alu 0xfffe
; GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
; GISEL-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
; GISEL-NEXT: v_mov_b32_e32 v5, s9
; GISEL-NEXT: scratch_store_b32 off, v4, s10
; GISEL-NEXT: s_clause 0x1
; GISEL-NEXT: scratch_store_b128 off, v[0:3], s11
; GISEL-NEXT: scratch_store_b32 off, v5, s11
; GISEL-NEXT: s_xor_b32 exec_lo, s34, -1
; GISEL-NEXT: s_clause 0x5
; GISEL-NEXT: scratch_load_b32 v0, off, s32
; GISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4
; GISEL-NEXT: scratch_load_b32 v2, off, s32 offset:8
; GISEL-NEXT: scratch_load_b32 v3, off, s32 offset:12
; GISEL-NEXT: scratch_load_b32 v4, off, s32 offset:16
; GISEL-NEXT: scratch_load_b32 v5, off, s32 offset:20
; GISEL-NEXT: s_mov_b32 exec_lo, s34
; GISEL-NEXT: s_wait_loadcnt 0x0
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL64-LABEL: inreg_args:
; DAGISEL64: ; %bb.0:
; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL64-NEXT: s_wait_expcnt 0x0
; DAGISEL64-NEXT: s_wait_samplecnt 0x0
; DAGISEL64-NEXT: s_wait_bvhcnt 0x0
; DAGISEL64-NEXT: s_wait_kmcnt 0x0
; DAGISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1
; DAGISEL64-NEXT: s_clause 0x5
; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32
; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4
; DAGISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8
; DAGISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12
; DAGISEL64-NEXT: scratch_store_b32 off, v4, s32 offset:16
; DAGISEL64-NEXT: scratch_store_b32 off, v5, s32 offset:20
; DAGISEL64-NEXT: s_mov_b64 exec, -1
; DAGISEL64-NEXT: v_mov_b32_e32 v4, s4
; DAGISEL64-NEXT: v_mov_b32_e32 v0, s5
; DAGISEL64-NEXT: v_mov_b32_e32 v1, s6
; DAGISEL64-NEXT: v_mov_b32_e32 v2, s7
; DAGISEL64-NEXT: v_mov_b32_e32 v3, s8
; DAGISEL64-NEXT: v_mov_b32_e32 v5, s9
; DAGISEL64-NEXT: scratch_store_b32 off, v4, s10
; DAGISEL64-NEXT: s_clause 0x1
; DAGISEL64-NEXT: scratch_store_b128 off, v[0:3], s11
; DAGISEL64-NEXT: scratch_store_b32 off, v5, s11
; DAGISEL64-NEXT: s_wait_alu 0xfffe
; DAGISEL64-NEXT: s_xor_b64 exec, s[0:1], -1
; DAGISEL64-NEXT: s_clause 0x5
; DAGISEL64-NEXT: scratch_load_b32 v0, off, s32
; DAGISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4
; DAGISEL64-NEXT: scratch_load_b32 v2, off, s32 offset:8
; DAGISEL64-NEXT: scratch_load_b32 v3, off, s32 offset:12
; DAGISEL64-NEXT: scratch_load_b32 v4, off, s32 offset:16
; DAGISEL64-NEXT: scratch_load_b32 v5, off, s32 offset:20
; DAGISEL64-NEXT: s_mov_b64 exec, s[0:1]
; DAGISEL64-NEXT: s_wait_loadcnt 0x0
; DAGISEL64-NEXT: s_setpc_b64 s[30:31]
;
; GISEL64-LABEL: inreg_args:
; GISEL64: ; %bb.0:
; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL64-NEXT: s_wait_expcnt 0x0
; GISEL64-NEXT: s_wait_samplecnt 0x0
; GISEL64-NEXT: s_wait_bvhcnt 0x0
; GISEL64-NEXT: s_wait_kmcnt 0x0
; GISEL64-NEXT: s_xor_saveexec_b64 s[34:35], -1
; GISEL64-NEXT: s_clause 0x5
; GISEL64-NEXT: scratch_store_b32 off, v0, s32
; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4
; GISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8
; GISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12
; GISEL64-NEXT: scratch_store_b32 off, v4, s32 offset:16
; GISEL64-NEXT: scratch_store_b32 off, v5, s32 offset:20
; GISEL64-NEXT: s_mov_b64 exec, -1
; GISEL64-NEXT: s_mov_b32 s0, s5
; GISEL64-NEXT: s_mov_b32 s1, s6
; GISEL64-NEXT: s_mov_b32 s2, s7
; GISEL64-NEXT: s_mov_b32 s3, s8
; GISEL64-NEXT: v_mov_b32_e32 v4, s4
; GISEL64-NEXT: s_wait_alu 0xfffe
; GISEL64-NEXT: v_mov_b32_e32 v0, s0
; GISEL64-NEXT: v_mov_b32_e32 v1, s1
; GISEL64-NEXT: v_mov_b32_e32 v2, s2
; GISEL64-NEXT: v_mov_b32_e32 v3, s3
; GISEL64-NEXT: v_mov_b32_e32 v5, s9
; GISEL64-NEXT: scratch_store_b32 off, v4, s10
; GISEL64-NEXT: s_clause 0x1
; GISEL64-NEXT: scratch_store_b128 off, v[0:3], s11
; GISEL64-NEXT: scratch_store_b32 off, v5, s11
; GISEL64-NEXT: s_xor_b64 exec, s[34:35], -1
; GISEL64-NEXT: s_clause 0x5
; GISEL64-NEXT: scratch_load_b32 v0, off, s32
; GISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4
; GISEL64-NEXT: scratch_load_b32 v2, off, s32 offset:8
; GISEL64-NEXT: scratch_load_b32 v3, off, s32 offset:12
; GISEL64-NEXT: scratch_load_b32 v4, off, s32 offset:16
; GISEL64-NEXT: scratch_load_b32 v5, off, s32 offset:20
; GISEL64-NEXT: s_mov_b64 exec, s[34:35]
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_setpc_b64 s[30:31]
store i32 %i32, ptr addrspace(5) %ptr
store <4 x i32> %v4i32, ptr addrspace(5) %ptr2
store float %float, ptr addrspace(5) %ptr2
ret void
}
declare amdgpu_gfx <2 x half> @gfx_callee(<2 x half> %x, <2 x half> %y)
define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 x half> %x, <2 x half> %y) {
; DAGISEL-LABEL: call_gfx_from_whole_wave:
; DAGISEL: ; %bb.0:
; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL-NEXT: s_wait_expcnt 0x0
; DAGISEL-NEXT: s_wait_samplecnt 0x0
; DAGISEL-NEXT: s_wait_bvhcnt 0x0
; DAGISEL-NEXT: s_wait_kmcnt 0x0
; DAGISEL-NEXT: s_mov_b32 s0, s33
; DAGISEL-NEXT: s_mov_b32 s33, s32
; DAGISEL-NEXT: s_xor_saveexec_b32 s4, -1
; DAGISEL-NEXT: s_clause 0x1f
; DAGISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4
; DAGISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8
; DAGISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12
; DAGISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16
; DAGISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20
; DAGISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24
; DAGISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28
; DAGISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32
; DAGISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36
; DAGISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40
; DAGISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44
; DAGISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48
; DAGISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52
; DAGISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56
; DAGISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60
; DAGISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64
; DAGISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68
; DAGISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72
; DAGISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76
; DAGISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80
; DAGISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84
; DAGISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88
; DAGISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92
; DAGISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96
; DAGISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100
; DAGISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104
; DAGISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108
; DAGISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112
; DAGISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116
; DAGISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120
; DAGISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124
; DAGISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128
; DAGISEL-NEXT: s_clause 0x1f
; DAGISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132
; DAGISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136
; DAGISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140
; DAGISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144
; DAGISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148
; DAGISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152
; DAGISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156
; DAGISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160
; DAGISEL-NEXT: scratch_store_b32 off, v48, s33 offset:164
; DAGISEL-NEXT: scratch_store_b32 off, v49, s33 offset:168
; DAGISEL-NEXT: scratch_store_b32 off, v50, s33 offset:172
; DAGISEL-NEXT: scratch_store_b32 off, v51, s33 offset:176
; DAGISEL-NEXT: scratch_store_b32 off, v52, s33 offset:180
; DAGISEL-NEXT: scratch_store_b32 off, v53, s33 offset:184
; DAGISEL-NEXT: scratch_store_b32 off, v54, s33 offset:188
; DAGISEL-NEXT: scratch_store_b32 off, v55, s33 offset:192
; DAGISEL-NEXT: scratch_store_b32 off, v64, s33 offset:196
; DAGISEL-NEXT: scratch_store_b32 off, v65, s33 offset:200
; DAGISEL-NEXT: scratch_store_b32 off, v66, s33 offset:204
; DAGISEL-NEXT: scratch_store_b32 off, v67, s33 offset:208
; DAGISEL-NEXT: scratch_store_b32 off, v68, s33 offset:212
; DAGISEL-NEXT: scratch_store_b32 off, v69, s33 offset:216
; DAGISEL-NEXT: scratch_store_b32 off, v70, s33 offset:220
; DAGISEL-NEXT: scratch_store_b32 off, v71, s33 offset:224
; DAGISEL-NEXT: scratch_store_b32 off, v80, s33 offset:228
; DAGISEL-NEXT: scratch_store_b32 off, v81, s33 offset:232
; DAGISEL-NEXT: scratch_store_b32 off, v82, s33 offset:236
; DAGISEL-NEXT: scratch_store_b32 off, v83, s33 offset:240
; DAGISEL-NEXT: scratch_store_b32 off, v84, s33 offset:244
; DAGISEL-NEXT: scratch_store_b32 off, v85, s33 offset:248
; DAGISEL-NEXT: scratch_store_b32 off, v86, s33 offset:252
; DAGISEL-NEXT: scratch_store_b32 off, v87, s33 offset:256
; DAGISEL-NEXT: s_clause 0x1f
; DAGISEL-NEXT: scratch_store_b32 off, v96, s33 offset:260
; DAGISEL-NEXT: scratch_store_b32 off, v97, s33 offset:264
; DAGISEL-NEXT: scratch_store_b32 off, v98, s33 offset:268
; DAGISEL-NEXT: scratch_store_b32 off, v99, s33 offset:272
; DAGISEL-NEXT: scratch_store_b32 off, v100, s33 offset:276
; DAGISEL-NEXT: scratch_store_b32 off, v101, s33 offset:280
; DAGISEL-NEXT: scratch_store_b32 off, v102, s33 offset:284
; DAGISEL-NEXT: scratch_store_b32 off, v103, s33 offset:288
; DAGISEL-NEXT: scratch_store_b32 off, v112, s33 offset:292
; DAGISEL-NEXT: scratch_store_b32 off, v113, s33 offset:296
; DAGISEL-NEXT: scratch_store_b32 off, v114, s33 offset:300
; DAGISEL-NEXT: scratch_store_b32 off, v115, s33 offset:304
; DAGISEL-NEXT: scratch_store_b32 off, v116, s33 offset:308
; DAGISEL-NEXT: scratch_store_b32 off, v117, s33 offset:312
; DAGISEL-NEXT: scratch_store_b32 off, v118, s33 offset:316
; DAGISEL-NEXT: scratch_store_b32 off, v119, s33 offset:320
; DAGISEL-NEXT: scratch_store_b32 off, v128, s33 offset:324
; DAGISEL-NEXT: scratch_store_b32 off, v129, s33 offset:328
; DAGISEL-NEXT: scratch_store_b32 off, v130, s33 offset:332
; DAGISEL-NEXT: scratch_store_b32 off, v131, s33 offset:336
; DAGISEL-NEXT: scratch_store_b32 off, v132, s33 offset:340
; DAGISEL-NEXT: scratch_store_b32 off, v133, s33 offset:344
; DAGISEL-NEXT: scratch_store_b32 off, v134, s33 offset:348
; DAGISEL-NEXT: scratch_store_b32 off, v135, s33 offset:352
; DAGISEL-NEXT: scratch_store_b32 off, v144, s33 offset:356
; DAGISEL-NEXT: scratch_store_b32 off, v145, s33 offset:360
; DAGISEL-NEXT: scratch_store_b32 off, v146, s33 offset:364
; DAGISEL-NEXT: scratch_store_b32 off, v147, s33 offset:368
; DAGISEL-NEXT: scratch_store_b32 off, v148, s33 offset:372
; DAGISEL-NEXT: scratch_store_b32 off, v149, s33 offset:376
; DAGISEL-NEXT: scratch_store_b32 off, v150, s33 offset:380
; DAGISEL-NEXT: scratch_store_b32 off, v151, s33 offset:384
; DAGISEL-NEXT: s_clause 0x1f
; DAGISEL-NEXT: scratch_store_b32 off, v160, s33 offset:388
; DAGISEL-NEXT: scratch_store_b32 off, v161, s33 offset:392
; DAGISEL-NEXT: scratch_store_b32 off, v162, s33 offset:396
; DAGISEL-NEXT: scratch_store_b32 off, v163, s33 offset:400
; DAGISEL-NEXT: scratch_store_b32 off, v164, s33 offset:404
; DAGISEL-NEXT: scratch_store_b32 off, v165, s33 offset:408
; DAGISEL-NEXT: scratch_store_b32 off, v166, s33 offset:412
; DAGISEL-NEXT: scratch_store_b32 off, v167, s33 offset:416
; DAGISEL-NEXT: scratch_store_b32 off, v176, s33 offset:420
; DAGISEL-NEXT: scratch_store_b32 off, v177, s33 offset:424
; DAGISEL-NEXT: scratch_store_b32 off, v178, s33 offset:428
; DAGISEL-NEXT: scratch_store_b32 off, v179, s33 offset:432
; DAGISEL-NEXT: scratch_store_b32 off, v180, s33 offset:436
; DAGISEL-NEXT: scratch_store_b32 off, v181, s33 offset:440
; DAGISEL-NEXT: scratch_store_b32 off, v182, s33 offset:444
; DAGISEL-NEXT: scratch_store_b32 off, v183, s33 offset:448
; DAGISEL-NEXT: scratch_store_b32 off, v192, s33 offset:452
; DAGISEL-NEXT: scratch_store_b32 off, v193, s33 offset:456
; DAGISEL-NEXT: scratch_store_b32 off, v194, s33 offset:460
; DAGISEL-NEXT: scratch_store_b32 off, v195, s33 offset:464
; DAGISEL-NEXT: scratch_store_b32 off, v196, s33 offset:468
; DAGISEL-NEXT: scratch_store_b32 off, v197, s33 offset:472
; DAGISEL-NEXT: scratch_store_b32 off, v198, s33 offset:476
; DAGISEL-NEXT: scratch_store_b32 off, v199, s33 offset:480
; DAGISEL-NEXT: scratch_store_b32 off, v208, s33 offset:484
; DAGISEL-NEXT: scratch_store_b32 off, v209, s33 offset:488
; DAGISEL-NEXT: scratch_store_b32 off, v210, s33 offset:492
; DAGISEL-NEXT: scratch_store_b32 off, v211, s33 offset:496
; DAGISEL-NEXT: scratch_store_b32 off, v212, s33 offset:500
; DAGISEL-NEXT: scratch_store_b32 off, v213, s33 offset:504
; DAGISEL-NEXT: scratch_store_b32 off, v214, s33 offset:508
; DAGISEL-NEXT: scratch_store_b32 off, v215, s33 offset:512
; DAGISEL-NEXT: s_clause 0xf
; DAGISEL-NEXT: scratch_store_b32 off, v224, s33 offset:516
; DAGISEL-NEXT: scratch_store_b32 off, v225, s33 offset:520
; DAGISEL-NEXT: scratch_store_b32 off, v226, s33 offset:524
; DAGISEL-NEXT: scratch_store_b32 off, v227, s33 offset:528
; DAGISEL-NEXT: scratch_store_b32 off, v228, s33 offset:532
; DAGISEL-NEXT: scratch_store_b32 off, v229, s33 offset:536
; DAGISEL-NEXT: scratch_store_b32 off, v230, s33 offset:540
; DAGISEL-NEXT: scratch_store_b32 off, v231, s33 offset:544
; DAGISEL-NEXT: scratch_store_b32 off, v240, s33 offset:548
; DAGISEL-NEXT: scratch_store_b32 off, v241, s33 offset:552
; DAGISEL-NEXT: scratch_store_b32 off, v242, s33 offset:556
; DAGISEL-NEXT: scratch_store_b32 off, v243, s33 offset:560
; DAGISEL-NEXT: scratch_store_b32 off, v244, s33 offset:564
; DAGISEL-NEXT: scratch_store_b32 off, v245, s33 offset:568
; DAGISEL-NEXT: scratch_store_b32 off, v246, s33 offset:572
; DAGISEL-NEXT: scratch_store_b32 off, v247, s33 offset:576
; DAGISEL-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; DAGISEL-NEXT: s_wait_alu 0xfffe
; DAGISEL-NEXT: v_writelane_b32 v40, s0, 3
; DAGISEL-NEXT: v_mov_b32_e32 v2, v0
; DAGISEL-NEXT: v_swap_b32 v0, v1
; DAGISEL-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi
; DAGISEL-NEXT: v_writelane_b32 v40, s4, 0
; DAGISEL-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo
; DAGISEL-NEXT: s_addk_co_i32 s32, 0x250
; DAGISEL-NEXT: v_writelane_b32 v40, s30, 1
; DAGISEL-NEXT: v_writelane_b32 v40, s31, 2
; DAGISEL-NEXT: s_wait_alu 0xfffe
; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL-NEXT: v_readlane_b32 s31, v40, 2
; DAGISEL-NEXT: v_readlane_b32 s30, v40, 1
; DAGISEL-NEXT: v_readlane_b32 s4, v40, 0
; DAGISEL-NEXT: v_readlane_b32 s0, v40, 3
; DAGISEL-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; DAGISEL-NEXT: s_mov_b32 s32, s33
; DAGISEL-NEXT: s_xor_b32 exec_lo, s4, -1
; DAGISEL-NEXT: s_clause 0x1f
; DAGISEL-NEXT: scratch_load_b32 v0, off, s33 offset:4
; DAGISEL-NEXT: scratch_load_b32 v1, off, s33 offset:8
; DAGISEL-NEXT: scratch_load_b32 v2, off, s33 offset:12
; DAGISEL-NEXT: scratch_load_b32 v3, off, s33 offset:16
; DAGISEL-NEXT: scratch_load_b32 v4, off, s33 offset:20
; DAGISEL-NEXT: scratch_load_b32 v5, off, s33 offset:24
; DAGISEL-NEXT: scratch_load_b32 v6, off, s33 offset:28
; DAGISEL-NEXT: scratch_load_b32 v7, off, s33 offset:32
; DAGISEL-NEXT: scratch_load_b32 v8, off, s33 offset:36
; DAGISEL-NEXT: scratch_load_b32 v9, off, s33 offset:40
; DAGISEL-NEXT: scratch_load_b32 v10, off, s33 offset:44
; DAGISEL-NEXT: scratch_load_b32 v11, off, s33 offset:48
; DAGISEL-NEXT: scratch_load_b32 v12, off, s33 offset:52
; DAGISEL-NEXT: scratch_load_b32 v13, off, s33 offset:56
; DAGISEL-NEXT: scratch_load_b32 v14, off, s33 offset:60
; DAGISEL-NEXT: scratch_load_b32 v15, off, s33 offset:64
; DAGISEL-NEXT: scratch_load_b32 v16, off, s33 offset:68
; DAGISEL-NEXT: scratch_load_b32 v17, off, s33 offset:72
; DAGISEL-NEXT: scratch_load_b32 v18, off, s33 offset:76
; DAGISEL-NEXT: scratch_load_b32 v19, off, s33 offset:80
; DAGISEL-NEXT: scratch_load_b32 v20, off, s33 offset:84
; DAGISEL-NEXT: scratch_load_b32 v21, off, s33 offset:88
; DAGISEL-NEXT: scratch_load_b32 v22, off, s33 offset:92
; DAGISEL-NEXT: scratch_load_b32 v23, off, s33 offset:96
; DAGISEL-NEXT: scratch_load_b32 v24, off, s33 offset:100
; DAGISEL-NEXT: scratch_load_b32 v25, off, s33 offset:104
; DAGISEL-NEXT: scratch_load_b32 v26, off, s33 offset:108
; DAGISEL-NEXT: scratch_load_b32 v27, off, s33 offset:112
; DAGISEL-NEXT: scratch_load_b32 v28, off, s33 offset:116
; DAGISEL-NEXT: scratch_load_b32 v29, off, s33 offset:120
; DAGISEL-NEXT: scratch_load_b32 v30, off, s33 offset:124
; DAGISEL-NEXT: scratch_load_b32 v31, off, s33 offset:128
; DAGISEL-NEXT: s_clause 0x1f
; DAGISEL-NEXT: scratch_load_b32 v32, off, s33 offset:132
; DAGISEL-NEXT: scratch_load_b32 v33, off, s33 offset:136
; DAGISEL-NEXT: scratch_load_b32 v34, off, s33 offset:140
; DAGISEL-NEXT: scratch_load_b32 v35, off, s33 offset:144
; DAGISEL-NEXT: scratch_load_b32 v36, off, s33 offset:148
; DAGISEL-NEXT: scratch_load_b32 v37, off, s33 offset:152
; DAGISEL-NEXT: scratch_load_b32 v38, off, s33 offset:156
; DAGISEL-NEXT: scratch_load_b32 v39, off, s33 offset:160
; DAGISEL-NEXT: scratch_load_b32 v48, off, s33 offset:164
; DAGISEL-NEXT: scratch_load_b32 v49, off, s33 offset:168
; DAGISEL-NEXT: scratch_load_b32 v50, off, s33 offset:172
; DAGISEL-NEXT: scratch_load_b32 v51, off, s33 offset:176
; DAGISEL-NEXT: scratch_load_b32 v52, off, s33 offset:180
; DAGISEL-NEXT: scratch_load_b32 v53, off, s33 offset:184
; DAGISEL-NEXT: scratch_load_b32 v54, off, s33 offset:188
; DAGISEL-NEXT: scratch_load_b32 v55, off, s33 offset:192
; DAGISEL-NEXT: scratch_load_b32 v64, off, s33 offset:196
; DAGISEL-NEXT: scratch_load_b32 v65, off, s33 offset:200
; DAGISEL-NEXT: scratch_load_b32 v66, off, s33 offset:204
; DAGISEL-NEXT: scratch_load_b32 v67, off, s33 offset:208
; DAGISEL-NEXT: scratch_load_b32 v68, off, s33 offset:212
; DAGISEL-NEXT: scratch_load_b32 v69, off, s33 offset:216
; DAGISEL-NEXT: scratch_load_b32 v70, off, s33 offset:220
; DAGISEL-NEXT: scratch_load_b32 v71, off, s33 offset:224
; DAGISEL-NEXT: scratch_load_b32 v80, off, s33 offset:228
; DAGISEL-NEXT: scratch_load_b32 v81, off, s33 offset:232
; DAGISEL-NEXT: scratch_load_b32 v82, off, s33 offset:236
; DAGISEL-NEXT: scratch_load_b32 v83, off, s33 offset:240
; DAGISEL-NEXT: scratch_load_b32 v84, off, s33 offset:244
; DAGISEL-NEXT: scratch_load_b32 v85, off, s33 offset:248
; DAGISEL-NEXT: scratch_load_b32 v86, off, s33 offset:252
; DAGISEL-NEXT: scratch_load_b32 v87, off, s33 offset:256
; DAGISEL-NEXT: s_clause 0x1f
; DAGISEL-NEXT: scratch_load_b32 v96, off, s33 offset:260
; DAGISEL-NEXT: scratch_load_b32 v97, off, s33 offset:264
; DAGISEL-NEXT: scratch_load_b32 v98, off, s33 offset:268
; DAGISEL-NEXT: scratch_load_b32 v99, off, s33 offset:272
; DAGISEL-NEXT: scratch_load_b32 v100, off, s33 offset:276
; DAGISEL-NEXT: scratch_load_b32 v101, off, s33 offset:280
; DAGISEL-NEXT: scratch_load_b32 v102, off, s33 offset:284
; DAGISEL-NEXT: scratch_load_b32 v103, off, s33 offset:288
; DAGISEL-NEXT: scratch_load_b32 v112, off, s33 offset:292
; DAGISEL-NEXT: scratch_load_b32 v113, off, s33 offset:296
; DAGISEL-NEXT: scratch_load_b32 v114, off, s33 offset:300
; DAGISEL-NEXT: scratch_load_b32 v115, off, s33 offset:304
; DAGISEL-NEXT: scratch_load_b32 v116, off, s33 offset:308
; DAGISEL-NEXT: scratch_load_b32 v117, off, s33 offset:312
; DAGISEL-NEXT: scratch_load_b32 v118, off, s33 offset:316
; DAGISEL-NEXT: scratch_load_b32 v119, off, s33 offset:320
; DAGISEL-NEXT: scratch_load_b32 v128, off, s33 offset:324
; DAGISEL-NEXT: scratch_load_b32 v129, off, s33 offset:328
; DAGISEL-NEXT: scratch_load_b32 v130, off, s33 offset:332
; DAGISEL-NEXT: scratch_load_b32 v131, off, s33 offset:336
; DAGISEL-NEXT: scratch_load_b32 v132, off, s33 offset:340
; DAGISEL-NEXT: scratch_load_b32 v133, off, s33 offset:344
; DAGISEL-NEXT: scratch_load_b32 v134, off, s33 offset:348
; DAGISEL-NEXT: scratch_load_b32 v135, off, s33 offset:352
; DAGISEL-NEXT: scratch_load_b32 v144, off, s33 offset:356
; DAGISEL-NEXT: scratch_load_b32 v145, off, s33 offset:360
; DAGISEL-NEXT: scratch_load_b32 v146, off, s33 offset:364
; DAGISEL-NEXT: scratch_load_b32 v147, off, s33 offset:368
; DAGISEL-NEXT: scratch_load_b32 v148, off, s33 offset:372
; DAGISEL-NEXT: scratch_load_b32 v149, off, s33 offset:376
; DAGISEL-NEXT: scratch_load_b32 v150, off, s33 offset:380
; DAGISEL-NEXT: scratch_load_b32 v151, off, s33 offset:384
; DAGISEL-NEXT: s_clause 0x1f
; DAGISEL-NEXT: scratch_load_b32 v160, off, s33 offset:388
; DAGISEL-NEXT: scratch_load_b32 v161, off, s33 offset:392
; DAGISEL-NEXT: scratch_load_b32 v162, off, s33 offset:396
; DAGISEL-NEXT: scratch_load_b32 v163, off, s33 offset:400
; DAGISEL-NEXT: scratch_load_b32 v164, off, s33 offset:404
; DAGISEL-NEXT: scratch_load_b32 v165, off, s33 offset:408
; DAGISEL-NEXT: scratch_load_b32 v166, off, s33 offset:412
; DAGISEL-NEXT: scratch_load_b32 v167, off, s33 offset:416
; DAGISEL-NEXT: scratch_load_b32 v176, off, s33 offset:420
; DAGISEL-NEXT: scratch_load_b32 v177, off, s33 offset:424
; DAGISEL-NEXT: scratch_load_b32 v178, off, s33 offset:428
; DAGISEL-NEXT: scratch_load_b32 v179, off, s33 offset:432
; DAGISEL-NEXT: scratch_load_b32 v180, off, s33 offset:436
; DAGISEL-NEXT: scratch_load_b32 v181, off, s33 offset:440
; DAGISEL-NEXT: scratch_load_b32 v182, off, s33 offset:444
; DAGISEL-NEXT: scratch_load_b32 v183, off, s33 offset:448
; DAGISEL-NEXT: scratch_load_b32 v192, off, s33 offset:452
; DAGISEL-NEXT: scratch_load_b32 v193, off, s33 offset:456
; DAGISEL-NEXT: scratch_load_b32 v194, off, s33 offset:460
; DAGISEL-NEXT: scratch_load_b32 v195, off, s33 offset:464
; DAGISEL-NEXT: scratch_load_b32 v196, off, s33 offset:468
; DAGISEL-NEXT: scratch_load_b32 v197, off, s33 offset:472
; DAGISEL-NEXT: scratch_load_b32 v198, off, s33 offset:476
; DAGISEL-NEXT: scratch_load_b32 v199, off, s33 offset:480
; DAGISEL-NEXT: scratch_load_b32 v208, off, s33 offset:484
; DAGISEL-NEXT: scratch_load_b32 v209, off, s33 offset:488
; DAGISEL-NEXT: scratch_load_b32 v210, off, s33 offset:492
; DAGISEL-NEXT: scratch_load_b32 v211, off, s33 offset:496
; DAGISEL-NEXT: scratch_load_b32 v212, off, s33 offset:500
; DAGISEL-NEXT: scratch_load_b32 v213, off, s33 offset:504
; DAGISEL-NEXT: scratch_load_b32 v214, off, s33 offset:508
; DAGISEL-NEXT: scratch_load_b32 v215, off, s33 offset:512
; DAGISEL-NEXT: s_clause 0xf
; DAGISEL-NEXT: scratch_load_b32 v224, off, s33 offset:516
; DAGISEL-NEXT: scratch_load_b32 v225, off, s33 offset:520
; DAGISEL-NEXT: scratch_load_b32 v226, off, s33 offset:524
; DAGISEL-NEXT: scratch_load_b32 v227, off, s33 offset:528
; DAGISEL-NEXT: scratch_load_b32 v228, off, s33 offset:532
; DAGISEL-NEXT: scratch_load_b32 v229, off, s33 offset:536
; DAGISEL-NEXT: scratch_load_b32 v230, off, s33 offset:540
; DAGISEL-NEXT: scratch_load_b32 v231, off, s33 offset:544
; DAGISEL-NEXT: scratch_load_b32 v240, off, s33 offset:548
; DAGISEL-NEXT: scratch_load_b32 v241, off, s33 offset:552
; DAGISEL-NEXT: scratch_load_b32 v242, off, s33 offset:556
; DAGISEL-NEXT: scratch_load_b32 v243, off, s33 offset:560
; DAGISEL-NEXT: scratch_load_b32 v244, off, s33 offset:564
; DAGISEL-NEXT: scratch_load_b32 v245, off, s33 offset:568
; DAGISEL-NEXT: scratch_load_b32 v246, off, s33 offset:572
; DAGISEL-NEXT: scratch_load_b32 v247, off, s33 offset:576
; DAGISEL-NEXT: s_mov_b32 exec_lo, s4
; DAGISEL-NEXT: s_mov_b32 s33, s0
; DAGISEL-NEXT: s_wait_loadcnt 0x0
; DAGISEL-NEXT: s_wait_alu 0xfffe
; DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: call_gfx_from_whole_wave:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-NEXT: s_wait_expcnt 0x0
; GISEL-NEXT: s_wait_samplecnt 0x0
; GISEL-NEXT: s_wait_bvhcnt 0x0
; GISEL-NEXT: s_wait_kmcnt 0x0
; GISEL-NEXT: s_mov_b32 s0, s33
; GISEL-NEXT: s_mov_b32 s33, s32
; GISEL-NEXT: s_xor_saveexec_b32 s4, -1
; GISEL-NEXT: s_clause 0x1f
; GISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4
; GISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8
; GISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12
; GISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16
; GISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20
; GISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24
; GISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28
; GISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32
; GISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36
; GISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40
; GISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44
; GISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48
; GISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52
; GISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56
; GISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60
; GISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64
; GISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68
; GISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72
; GISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76
; GISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80
; GISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84
; GISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88
; GISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92
; GISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96
; GISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100
; GISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104
; GISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108
; GISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112
; GISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116
; GISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120
; GISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124
; GISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128
; GISEL-NEXT: s_clause 0x1f
; GISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132
; GISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136
; GISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140
; GISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144
; GISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148
; GISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152
; GISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156
; GISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160
; GISEL-NEXT: scratch_store_b32 off, v48, s33 offset:164
; GISEL-NEXT: scratch_store_b32 off, v49, s33 offset:168
; GISEL-NEXT: scratch_store_b32 off, v50, s33 offset:172
; GISEL-NEXT: scratch_store_b32 off, v51, s33 offset:176
; GISEL-NEXT: scratch_store_b32 off, v52, s33 offset:180
; GISEL-NEXT: scratch_store_b32 off, v53, s33 offset:184
; GISEL-NEXT: scratch_store_b32 off, v54, s33 offset:188
; GISEL-NEXT: scratch_store_b32 off, v55, s33 offset:192
; GISEL-NEXT: scratch_store_b32 off, v64, s33 offset:196
; GISEL-NEXT: scratch_store_b32 off, v65, s33 offset:200
; GISEL-NEXT: scratch_store_b32 off, v66, s33 offset:204
; GISEL-NEXT: scratch_store_b32 off, v67, s33 offset:208
; GISEL-NEXT: scratch_store_b32 off, v68, s33 offset:212
; GISEL-NEXT: scratch_store_b32 off, v69, s33 offset:216
; GISEL-NEXT: scratch_store_b32 off, v70, s33 offset:220
; GISEL-NEXT: scratch_store_b32 off, v71, s33 offset:224
; GISEL-NEXT: scratch_store_b32 off, v80, s33 offset:228
; GISEL-NEXT: scratch_store_b32 off, v81, s33 offset:232
; GISEL-NEXT: scratch_store_b32 off, v82, s33 offset:236
; GISEL-NEXT: scratch_store_b32 off, v83, s33 offset:240
; GISEL-NEXT: scratch_store_b32 off, v84, s33 offset:244
; GISEL-NEXT: scratch_store_b32 off, v85, s33 offset:248
; GISEL-NEXT: scratch_store_b32 off, v86, s33 offset:252
; GISEL-NEXT: scratch_store_b32 off, v87, s33 offset:256
; GISEL-NEXT: s_clause 0x1f
; GISEL-NEXT: scratch_store_b32 off, v96, s33 offset:260
; GISEL-NEXT: scratch_store_b32 off, v97, s33 offset:264
; GISEL-NEXT: scratch_store_b32 off, v98, s33 offset:268
; GISEL-NEXT: scratch_store_b32 off, v99, s33 offset:272
; GISEL-NEXT: scratch_store_b32 off, v100, s33 offset:276
; GISEL-NEXT: scratch_store_b32 off, v101, s33 offset:280
; GISEL-NEXT: scratch_store_b32 off, v102, s33 offset:284
; GISEL-NEXT: scratch_store_b32 off, v103, s33 offset:288
; GISEL-NEXT: scratch_store_b32 off, v112, s33 offset:292
; GISEL-NEXT: scratch_store_b32 off, v113, s33 offset:296
; GISEL-NEXT: scratch_store_b32 off, v114, s33 offset:300
; GISEL-NEXT: scratch_store_b32 off, v115, s33 offset:304
; GISEL-NEXT: scratch_store_b32 off, v116, s33 offset:308
; GISEL-NEXT: scratch_store_b32 off, v117, s33 offset:312
; GISEL-NEXT: scratch_store_b32 off, v118, s33 offset:316
; GISEL-NEXT: scratch_store_b32 off, v119, s33 offset:320
; GISEL-NEXT: scratch_store_b32 off, v128, s33 offset:324
; GISEL-NEXT: scratch_store_b32 off, v129, s33 offset:328
; GISEL-NEXT: scratch_store_b32 off, v130, s33 offset:332
; GISEL-NEXT: scratch_store_b32 off, v131, s33 offset:336
; GISEL-NEXT: scratch_store_b32 off, v132, s33 offset:340
; GISEL-NEXT: scratch_store_b32 off, v133, s33 offset:344
; GISEL-NEXT: scratch_store_b32 off, v134, s33 offset:348
; GISEL-NEXT: scratch_store_b32 off, v135, s33 offset:352
; GISEL-NEXT: scratch_store_b32 off, v144, s33 offset:356
; GISEL-NEXT: scratch_store_b32 off, v145, s33 offset:360
; GISEL-NEXT: scratch_store_b32 off, v146, s33 offset:364
; GISEL-NEXT: scratch_store_b32 off, v147, s33 offset:368
; GISEL-NEXT: scratch_store_b32 off, v148, s33 offset:372
; GISEL-NEXT: scratch_store_b32 off, v149, s33 offset:376
; GISEL-NEXT: scratch_store_b32 off, v150, s33 offset:380
; GISEL-NEXT: scratch_store_b32 off, v151, s33 offset:384
; GISEL-NEXT: s_clause 0x1f
; GISEL-NEXT: scratch_store_b32 off, v160, s33 offset:388
; GISEL-NEXT: scratch_store_b32 off, v161, s33 offset:392
; GISEL-NEXT: scratch_store_b32 off, v162, s33 offset:396
; GISEL-NEXT: scratch_store_b32 off, v163, s33 offset:400
; GISEL-NEXT: scratch_store_b32 off, v164, s33 offset:404
; GISEL-NEXT: scratch_store_b32 off, v165, s33 offset:408
; GISEL-NEXT: scratch_store_b32 off, v166, s33 offset:412
; GISEL-NEXT: scratch_store_b32 off, v167, s33 offset:416
; GISEL-NEXT: scratch_store_b32 off, v176, s33 offset:420
; GISEL-NEXT: scratch_store_b32 off, v177, s33 offset:424
; GISEL-NEXT: scratch_store_b32 off, v178, s33 offset:428
; GISEL-NEXT: scratch_store_b32 off, v179, s33 offset:432
; GISEL-NEXT: scratch_store_b32 off, v180, s33 offset:436
; GISEL-NEXT: scratch_store_b32 off, v181, s33 offset:440
; GISEL-NEXT: scratch_store_b32 off, v182, s33 offset:444
; GISEL-NEXT: scratch_store_b32 off, v183, s33 offset:448
; GISEL-NEXT: scratch_store_b32 off, v192, s33 offset:452
; GISEL-NEXT: scratch_store_b32 off, v193, s33 offset:456
; GISEL-NEXT: scratch_store_b32 off, v194, s33 offset:460
; GISEL-NEXT: scratch_store_b32 off, v195, s33 offset:464
; GISEL-NEXT: scratch_store_b32 off, v196, s33 offset:468
; GISEL-NEXT: scratch_store_b32 off, v197, s33 offset:472
; GISEL-NEXT: scratch_store_b32 off, v198, s33 offset:476
; GISEL-NEXT: scratch_store_b32 off, v199, s33 offset:480
; GISEL-NEXT: scratch_store_b32 off, v208, s33 offset:484
; GISEL-NEXT: scratch_store_b32 off, v209, s33 offset:488
; GISEL-NEXT: scratch_store_b32 off, v210, s33 offset:492
; GISEL-NEXT: scratch_store_b32 off, v211, s33 offset:496
; GISEL-NEXT: scratch_store_b32 off, v212, s33 offset:500
; GISEL-NEXT: scratch_store_b32 off, v213, s33 offset:504
; GISEL-NEXT: scratch_store_b32 off, v214, s33 offset:508
; GISEL-NEXT: scratch_store_b32 off, v215, s33 offset:512
; GISEL-NEXT: s_clause 0xf
; GISEL-NEXT: scratch_store_b32 off, v224, s33 offset:516
; GISEL-NEXT: scratch_store_b32 off, v225, s33 offset:520
; GISEL-NEXT: scratch_store_b32 off, v226, s33 offset:524
; GISEL-NEXT: scratch_store_b32 off, v227, s33 offset:528
; GISEL-NEXT: scratch_store_b32 off, v228, s33 offset:532
; GISEL-NEXT: scratch_store_b32 off, v229, s33 offset:536
; GISEL-NEXT: scratch_store_b32 off, v230, s33 offset:540
; GISEL-NEXT: scratch_store_b32 off, v231, s33 offset:544
; GISEL-NEXT: scratch_store_b32 off, v240, s33 offset:548
; GISEL-NEXT: scratch_store_b32 off, v241, s33 offset:552
; GISEL-NEXT: scratch_store_b32 off, v242, s33 offset:556
; GISEL-NEXT: scratch_store_b32 off, v243, s33 offset:560
; GISEL-NEXT: scratch_store_b32 off, v244, s33 offset:564
; GISEL-NEXT: scratch_store_b32 off, v245, s33 offset:568
; GISEL-NEXT: scratch_store_b32 off, v246, s33 offset:572
; GISEL-NEXT: scratch_store_b32 off, v247, s33 offset:576
; GISEL-NEXT: s_mov_b32 exec_lo, -1
; GISEL-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GISEL-NEXT: s_wait_alu 0xfffe
; GISEL-NEXT: v_writelane_b32 v40, s0, 3
; GISEL-NEXT: v_mov_b32_e32 v2, v0
; GISEL-NEXT: v_swap_b32 v0, v1
; GISEL-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo
; GISEL-NEXT: v_writelane_b32 v40, s4, 0
; GISEL-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi
; GISEL-NEXT: s_addk_co_i32 s32, 0x250
; GISEL-NEXT: v_writelane_b32 v40, s30, 1
; GISEL-NEXT: v_writelane_b32 v40, s31, 2
; GISEL-NEXT: s_wait_alu 0xfffe
; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-NEXT: v_readlane_b32 s31, v40, 2
; GISEL-NEXT: v_readlane_b32 s30, v40, 1
; GISEL-NEXT: v_readlane_b32 s4, v40, 0
; GISEL-NEXT: v_readlane_b32 s0, v40, 3
; GISEL-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GISEL-NEXT: s_mov_b32 s32, s33
; GISEL-NEXT: s_xor_b32 exec_lo, s4, -1
; GISEL-NEXT: s_clause 0x1f
; GISEL-NEXT: scratch_load_b32 v0, off, s33 offset:4
; GISEL-NEXT: scratch_load_b32 v1, off, s33 offset:8
; GISEL-NEXT: scratch_load_b32 v2, off, s33 offset:12
; GISEL-NEXT: scratch_load_b32 v3, off, s33 offset:16
; GISEL-NEXT: scratch_load_b32 v4, off, s33 offset:20
; GISEL-NEXT: scratch_load_b32 v5, off, s33 offset:24
; GISEL-NEXT: scratch_load_b32 v6, off, s33 offset:28
; GISEL-NEXT: scratch_load_b32 v7, off, s33 offset:32
; GISEL-NEXT: scratch_load_b32 v8, off, s33 offset:36
; GISEL-NEXT: scratch_load_b32 v9, off, s33 offset:40
; GISEL-NEXT: scratch_load_b32 v10, off, s33 offset:44
; GISEL-NEXT: scratch_load_b32 v11, off, s33 offset:48
; GISEL-NEXT: scratch_load_b32 v12, off, s33 offset:52
; GISEL-NEXT: scratch_load_b32 v13, off, s33 offset:56
; GISEL-NEXT: scratch_load_b32 v14, off, s33 offset:60
; GISEL-NEXT: scratch_load_b32 v15, off, s33 offset:64
; GISEL-NEXT: scratch_load_b32 v16, off, s33 offset:68
; GISEL-NEXT: scratch_load_b32 v17, off, s33 offset:72
; GISEL-NEXT: scratch_load_b32 v18, off, s33 offset:76
; GISEL-NEXT: scratch_load_b32 v19, off, s33 offset:80
; GISEL-NEXT: scratch_load_b32 v20, off, s33 offset:84
; GISEL-NEXT: scratch_load_b32 v21, off, s33 offset:88
; GISEL-NEXT: scratch_load_b32 v22, off, s33 offset:92
; GISEL-NEXT: scratch_load_b32 v23, off, s33 offset:96
; GISEL-NEXT: scratch_load_b32 v24, off, s33 offset:100
; GISEL-NEXT: scratch_load_b32 v25, off, s33 offset:104
; GISEL-NEXT: scratch_load_b32 v26, off, s33 offset:108
; GISEL-NEXT: scratch_load_b32 v27, off, s33 offset:112
; GISEL-NEXT: scratch_load_b32 v28, off, s33 offset:116
; GISEL-NEXT: scratch_load_b32 v29, off, s33 offset:120
; GISEL-NEXT: scratch_load_b32 v30, off, s33 offset:124
; GISEL-NEXT: scratch_load_b32 v31, off, s33 offset:128
; GISEL-NEXT: s_clause 0x1f
; GISEL-NEXT: scratch_load_b32 v32, off, s33 offset:132
; GISEL-NEXT: scratch_load_b32 v33, off, s33 offset:136
; GISEL-NEXT: scratch_load_b32 v34, off, s33 offset:140
; GISEL-NEXT: scratch_load_b32 v35, off, s33 offset:144
; GISEL-NEXT: scratch_load_b32 v36, off, s33 offset:148
; GISEL-NEXT: scratch_load_b32 v37, off, s33 offset:152
; GISEL-NEXT: scratch_load_b32 v38, off, s33 offset:156
; GISEL-NEXT: scratch_load_b32 v39, off, s33 offset:160
; GISEL-NEXT: scratch_load_b32 v48, off, s33 offset:164
; GISEL-NEXT: scratch_load_b32 v49, off, s33 offset:168
; GISEL-NEXT: scratch_load_b32 v50, off, s33 offset:172
; GISEL-NEXT: scratch_load_b32 v51, off, s33 offset:176
; GISEL-NEXT: scratch_load_b32 v52, off, s33 offset:180
; GISEL-NEXT: scratch_load_b32 v53, off, s33 offset:184
; GISEL-NEXT: scratch_load_b32 v54, off, s33 offset:188
; GISEL-NEXT: scratch_load_b32 v55, off, s33 offset:192
; GISEL-NEXT: scratch_load_b32 v64, off, s33 offset:196
; GISEL-NEXT: scratch_load_b32 v65, off, s33 offset:200
; GISEL-NEXT: scratch_load_b32 v66, off, s33 offset:204
; GISEL-NEXT: scratch_load_b32 v67, off, s33 offset:208
; GISEL-NEXT: scratch_load_b32 v68, off, s33 offset:212
; GISEL-NEXT: scratch_load_b32 v69, off, s33 offset:216
; GISEL-NEXT: scratch_load_b32 v70, off, s33 offset:220
; GISEL-NEXT: scratch_load_b32 v71, off, s33 offset:224
; GISEL-NEXT: scratch_load_b32 v80, off, s33 offset:228
; GISEL-NEXT: scratch_load_b32 v81, off, s33 offset:232
; GISEL-NEXT: scratch_load_b32 v82, off, s33 offset:236
; GISEL-NEXT: scratch_load_b32 v83, off, s33 offset:240
; GISEL-NEXT: scratch_load_b32 v84, off, s33 offset:244
; GISEL-NEXT: scratch_load_b32 v85, off, s33 offset:248
; GISEL-NEXT: scratch_load_b32 v86, off, s33 offset:252
; GISEL-NEXT: scratch_load_b32 v87, off, s33 offset:256
; GISEL-NEXT: s_clause 0x1f
; GISEL-NEXT: scratch_load_b32 v96, off, s33 offset:260
; GISEL-NEXT: scratch_load_b32 v97, off, s33 offset:264
; GISEL-NEXT: scratch_load_b32 v98, off, s33 offset:268
; GISEL-NEXT: scratch_load_b32 v99, off, s33 offset:272
; GISEL-NEXT: scratch_load_b32 v100, off, s33 offset:276
; GISEL-NEXT: scratch_load_b32 v101, off, s33 offset:280
; GISEL-NEXT: scratch_load_b32 v102, off, s33 offset:284
; GISEL-NEXT: scratch_load_b32 v103, off, s33 offset:288
; GISEL-NEXT: scratch_load_b32 v112, off, s33 offset:292
; GISEL-NEXT: scratch_load_b32 v113, off, s33 offset:296
; GISEL-NEXT: scratch_load_b32 v114, off, s33 offset:300
; GISEL-NEXT: scratch_load_b32 v115, off, s33 offset:304
; GISEL-NEXT: scratch_load_b32 v116, off, s33 offset:308
; GISEL-NEXT: scratch_load_b32 v117, off, s33 offset:312
; GISEL-NEXT: scratch_load_b32 v118, off, s33 offset:316
; GISEL-NEXT: scratch_load_b32 v119, off, s33 offset:320
; GISEL-NEXT: scratch_load_b32 v128, off, s33 offset:324
; GISEL-NEXT: scratch_load_b32 v129, off, s33 offset:328
; GISEL-NEXT: scratch_load_b32 v130, off, s33 offset:332
; GISEL-NEXT: scratch_load_b32 v131, off, s33 offset:336
; GISEL-NEXT: scratch_load_b32 v132, off, s33 offset:340
; GISEL-NEXT: scratch_load_b32 v133, off, s33 offset:344
; GISEL-NEXT: scratch_load_b32 v134, off, s33 offset:348
; GISEL-NEXT: scratch_load_b32 v135, off, s33 offset:352
; GISEL-NEXT: scratch_load_b32 v144, off, s33 offset:356
; GISEL-NEXT: scratch_load_b32 v145, off, s33 offset:360
; GISEL-NEXT: scratch_load_b32 v146, off, s33 offset:364
; GISEL-NEXT: scratch_load_b32 v147, off, s33 offset:368
; GISEL-NEXT: scratch_load_b32 v148, off, s33 offset:372
; GISEL-NEXT: scratch_load_b32 v149, off, s33 offset:376
; GISEL-NEXT: scratch_load_b32 v150, off, s33 offset:380
; GISEL-NEXT: scratch_load_b32 v151, off, s33 offset:384
; GISEL-NEXT: s_clause 0x1f
; GISEL-NEXT: scratch_load_b32 v160, off, s33 offset:388
; GISEL-NEXT: scratch_load_b32 v161, off, s33 offset:392
; GISEL-NEXT: scratch_load_b32 v162, off, s33 offset:396
; GISEL-NEXT: scratch_load_b32 v163, off, s33 offset:400
; GISEL-NEXT: scratch_load_b32 v164, off, s33 offset:404
; GISEL-NEXT: scratch_load_b32 v165, off, s33 offset:408
; GISEL-NEXT: scratch_load_b32 v166, off, s33 offset:412
; GISEL-NEXT: scratch_load_b32 v167, off, s33 offset:416
; GISEL-NEXT: scratch_load_b32 v176, off, s33 offset:420
; GISEL-NEXT: scratch_load_b32 v177, off, s33 offset:424
; GISEL-NEXT: scratch_load_b32 v178, off, s33 offset:428
; GISEL-NEXT: scratch_load_b32 v179, off, s33 offset:432
; GISEL-NEXT: scratch_load_b32 v180, off, s33 offset:436
; GISEL-NEXT: scratch_load_b32 v181, off, s33 offset:440
; GISEL-NEXT: scratch_load_b32 v182, off, s33 offset:444
; GISEL-NEXT: scratch_load_b32 v183, off, s33 offset:448
; GISEL-NEXT: scratch_load_b32 v192, off, s33 offset:452
; GISEL-NEXT: scratch_load_b32 v193, off, s33 offset:456
; GISEL-NEXT: scratch_load_b32 v194, off, s33 offset:460
; GISEL-NEXT: scratch_load_b32 v195, off, s33 offset:464
; GISEL-NEXT: scratch_load_b32 v196, off, s33 offset:468
; GISEL-NEXT: scratch_load_b32 v197, off, s33 offset:472
; GISEL-NEXT: scratch_load_b32 v198, off, s33 offset:476
; GISEL-NEXT: scratch_load_b32 v199, off, s33 offset:480
; GISEL-NEXT: scratch_load_b32 v208, off, s33 offset:484
; GISEL-NEXT: scratch_load_b32 v209, off, s33 offset:488
; GISEL-NEXT: scratch_load_b32 v210, off, s33 offset:492
; GISEL-NEXT: scratch_load_b32 v211, off, s33 offset:496
; GISEL-NEXT: scratch_load_b32 v212, off, s33 offset:500
; GISEL-NEXT: scratch_load_b32 v213, off, s33 offset:504
; GISEL-NEXT: scratch_load_b32 v214, off, s33 offset:508
; GISEL-NEXT: scratch_load_b32 v215, off, s33 offset:512
; GISEL-NEXT: s_clause 0xf
; GISEL-NEXT: scratch_load_b32 v224, off, s33 offset:516
; GISEL-NEXT: scratch_load_b32 v225, off, s33 offset:520
; GISEL-NEXT: scratch_load_b32 v226, off, s33 offset:524
; GISEL-NEXT: scratch_load_b32 v227, off, s33 offset:528
; GISEL-NEXT: scratch_load_b32 v228, off, s33 offset:532
; GISEL-NEXT: scratch_load_b32 v229, off, s33 offset:536
; GISEL-NEXT: scratch_load_b32 v230, off, s33 offset:540
; GISEL-NEXT: scratch_load_b32 v231, off, s33 offset:544
; GISEL-NEXT: scratch_load_b32 v240, off, s33 offset:548
; GISEL-NEXT: scratch_load_b32 v241, off, s33 offset:552
; GISEL-NEXT: scratch_load_b32 v242, off, s33 offset:556
; GISEL-NEXT: scratch_load_b32 v243, off, s33 offset:560
; GISEL-NEXT: scratch_load_b32 v244, off, s33 offset:564
; GISEL-NEXT: scratch_load_b32 v245, off, s33 offset:568
; GISEL-NEXT: scratch_load_b32 v246, off, s33 offset:572
; GISEL-NEXT: scratch_load_b32 v247, off, s33 offset:576
; GISEL-NEXT: s_mov_b32 exec_lo, s4
; GISEL-NEXT: s_mov_b32 s33, s0
; GISEL-NEXT: s_wait_loadcnt 0x0
; GISEL-NEXT: s_wait_alu 0xfffe
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL64-LABEL: call_gfx_from_whole_wave:
; DAGISEL64: ; %bb.0:
; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL64-NEXT: s_wait_expcnt 0x0
; DAGISEL64-NEXT: s_wait_samplecnt 0x0
; DAGISEL64-NEXT: s_wait_bvhcnt 0x0
; DAGISEL64-NEXT: s_wait_kmcnt 0x0
; DAGISEL64-NEXT: s_mov_b32 s0, s33
; DAGISEL64-NEXT: s_mov_b32 s33, s32
; DAGISEL64-NEXT: s_xor_saveexec_b64 s[4:5], -1
; DAGISEL64-NEXT: s_clause 0x1f
; DAGISEL64-NEXT: scratch_store_b32 off, v0, s33 offset:4
; DAGISEL64-NEXT: scratch_store_b32 off, v1, s33 offset:8
; DAGISEL64-NEXT: scratch_store_b32 off, v2, s33 offset:12
; DAGISEL64-NEXT: scratch_store_b32 off, v3, s33 offset:16
; DAGISEL64-NEXT: scratch_store_b32 off, v4, s33 offset:20
; DAGISEL64-NEXT: scratch_store_b32 off, v5, s33 offset:24
; DAGISEL64-NEXT: scratch_store_b32 off, v6, s33 offset:28
; DAGISEL64-NEXT: scratch_store_b32 off, v7, s33 offset:32
; DAGISEL64-NEXT: scratch_store_b32 off, v8, s33 offset:36
; DAGISEL64-NEXT: scratch_store_b32 off, v9, s33 offset:40
; DAGISEL64-NEXT: scratch_store_b32 off, v10, s33 offset:44
; DAGISEL64-NEXT: scratch_store_b32 off, v11, s33 offset:48
; DAGISEL64-NEXT: scratch_store_b32 off, v12, s33 offset:52
; DAGISEL64-NEXT: scratch_store_b32 off, v13, s33 offset:56
; DAGISEL64-NEXT: scratch_store_b32 off, v14, s33 offset:60
; DAGISEL64-NEXT: scratch_store_b32 off, v15, s33 offset:64
; DAGISEL64-NEXT: scratch_store_b32 off, v16, s33 offset:68
; DAGISEL64-NEXT: scratch_store_b32 off, v17, s33 offset:72
; DAGISEL64-NEXT: scratch_store_b32 off, v18, s33 offset:76
; DAGISEL64-NEXT: scratch_store_b32 off, v19, s33 offset:80
; DAGISEL64-NEXT: scratch_store_b32 off, v20, s33 offset:84
; DAGISEL64-NEXT: scratch_store_b32 off, v21, s33 offset:88
; DAGISEL64-NEXT: scratch_store_b32 off, v22, s33 offset:92
; DAGISEL64-NEXT: scratch_store_b32 off, v23, s33 offset:96
; DAGISEL64-NEXT: scratch_store_b32 off, v24, s33 offset:100
; DAGISEL64-NEXT: scratch_store_b32 off, v25, s33 offset:104
; DAGISEL64-NEXT: scratch_store_b32 off, v26, s33 offset:108
; DAGISEL64-NEXT: scratch_store_b32 off, v27, s33 offset:112
; DAGISEL64-NEXT: scratch_store_b32 off, v28, s33 offset:116
; DAGISEL64-NEXT: scratch_store_b32 off, v29, s33 offset:120
; DAGISEL64-NEXT: scratch_store_b32 off, v30, s33 offset:124
; DAGISEL64-NEXT: scratch_store_b32 off, v31, s33 offset:128
; DAGISEL64-NEXT: s_clause 0x1f
; DAGISEL64-NEXT: scratch_store_b32 off, v32, s33 offset:132
; DAGISEL64-NEXT: scratch_store_b32 off, v33, s33 offset:136
; DAGISEL64-NEXT: scratch_store_b32 off, v34, s33 offset:140
; DAGISEL64-NEXT: scratch_store_b32 off, v35, s33 offset:144
; DAGISEL64-NEXT: scratch_store_b32 off, v36, s33 offset:148
; DAGISEL64-NEXT: scratch_store_b32 off, v37, s33 offset:152
; DAGISEL64-NEXT: scratch_store_b32 off, v38, s33 offset:156
; DAGISEL64-NEXT: scratch_store_b32 off, v39, s33 offset:160
; DAGISEL64-NEXT: scratch_store_b32 off, v48, s33 offset:164
; DAGISEL64-NEXT: scratch_store_b32 off, v49, s33 offset:168
; DAGISEL64-NEXT: scratch_store_b32 off, v50, s33 offset:172
; DAGISEL64-NEXT: scratch_store_b32 off, v51, s33 offset:176
; DAGISEL64-NEXT: scratch_store_b32 off, v52, s33 offset:180
; DAGISEL64-NEXT: scratch_store_b32 off, v53, s33 offset:184
; DAGISEL64-NEXT: scratch_store_b32 off, v54, s33 offset:188
; DAGISEL64-NEXT: scratch_store_b32 off, v55, s33 offset:192
; DAGISEL64-NEXT: scratch_store_b32 off, v64, s33 offset:196
; DAGISEL64-NEXT: scratch_store_b32 off, v65, s33 offset:200
; DAGISEL64-NEXT: scratch_store_b32 off, v66, s33 offset:204
; DAGISEL64-NEXT: scratch_store_b32 off, v67, s33 offset:208
; DAGISEL64-NEXT: scratch_store_b32 off, v68, s33 offset:212
; DAGISEL64-NEXT: scratch_store_b32 off, v69, s33 offset:216
; DAGISEL64-NEXT: scratch_store_b32 off, v70, s33 offset:220
; DAGISEL64-NEXT: scratch_store_b32 off, v71, s33 offset:224
; DAGISEL64-NEXT: scratch_store_b32 off, v80, s33 offset:228
; DAGISEL64-NEXT: scratch_store_b32 off, v81, s33 offset:232
; DAGISEL64-NEXT: scratch_store_b32 off, v82, s33 offset:236
; DAGISEL64-NEXT: scratch_store_b32 off, v83, s33 offset:240
; DAGISEL64-NEXT: scratch_store_b32 off, v84, s33 offset:244
; DAGISEL64-NEXT: scratch_store_b32 off, v85, s33 offset:248
; DAGISEL64-NEXT: scratch_store_b32 off, v86, s33 offset:252
; DAGISEL64-NEXT: scratch_store_b32 off, v87, s33 offset:256
; DAGISEL64-NEXT: s_clause 0x1f
; DAGISEL64-NEXT: scratch_store_b32 off, v96, s33 offset:260
; DAGISEL64-NEXT: scratch_store_b32 off, v97, s33 offset:264
; DAGISEL64-NEXT: scratch_store_b32 off, v98, s33 offset:268
; DAGISEL64-NEXT: scratch_store_b32 off, v99, s33 offset:272
; DAGISEL64-NEXT: scratch_store_b32 off, v100, s33 offset:276
; DAGISEL64-NEXT: scratch_store_b32 off, v101, s33 offset:280
; DAGISEL64-NEXT: scratch_store_b32 off, v102, s33 offset:284
; DAGISEL64-NEXT: scratch_store_b32 off, v103, s33 offset:288
; DAGISEL64-NEXT: scratch_store_b32 off, v112, s33 offset:292
; DAGISEL64-NEXT: scratch_store_b32 off, v113, s33 offset:296
; DAGISEL64-NEXT: scratch_store_b32 off, v114, s33 offset:300
; DAGISEL64-NEXT: scratch_store_b32 off, v115, s33 offset:304
; DAGISEL64-NEXT: scratch_store_b32 off, v116, s33 offset:308
; DAGISEL64-NEXT: scratch_store_b32 off, v117, s33 offset:312
; DAGISEL64-NEXT: scratch_store_b32 off, v118, s33 offset:316
; DAGISEL64-NEXT: scratch_store_b32 off, v119, s33 offset:320
; DAGISEL64-NEXT: scratch_store_b32 off, v128, s33 offset:324
; DAGISEL64-NEXT: scratch_store_b32 off, v129, s33 offset:328
; DAGISEL64-NEXT: scratch_store_b32 off, v130, s33 offset:332
; DAGISEL64-NEXT: scratch_store_b32 off, v131, s33 offset:336
; DAGISEL64-NEXT: scratch_store_b32 off, v132, s33 offset:340
; DAGISEL64-NEXT: scratch_store_b32 off, v133, s33 offset:344
; DAGISEL64-NEXT: scratch_store_b32 off, v134, s33 offset:348
; DAGISEL64-NEXT: scratch_store_b32 off, v135, s33 offset:352
; DAGISEL64-NEXT: scratch_store_b32 off, v144, s33 offset:356
; DAGISEL64-NEXT: scratch_store_b32 off, v145, s33 offset:360
; DAGISEL64-NEXT: scratch_store_b32 off, v146, s33 offset:364
; DAGISEL64-NEXT: scratch_store_b32 off, v147, s33 offset:368
; DAGISEL64-NEXT: scratch_store_b32 off, v148, s33 offset:372
; DAGISEL64-NEXT: scratch_store_b32 off, v149, s33 offset:376
; DAGISEL64-NEXT: scratch_store_b32 off, v150, s33 offset:380
; DAGISEL64-NEXT: scratch_store_b32 off, v151, s33 offset:384
; DAGISEL64-NEXT: s_clause 0x1f
; DAGISEL64-NEXT: scratch_store_b32 off, v160, s33 offset:388
; DAGISEL64-NEXT: scratch_store_b32 off, v161, s33 offset:392
; DAGISEL64-NEXT: scratch_store_b32 off, v162, s33 offset:396
; DAGISEL64-NEXT: scratch_store_b32 off, v163, s33 offset:400
; DAGISEL64-NEXT: scratch_store_b32 off, v164, s33 offset:404
; DAGISEL64-NEXT: scratch_store_b32 off, v165, s33 offset:408
; DAGISEL64-NEXT: scratch_store_b32 off, v166, s33 offset:412
; DAGISEL64-NEXT: scratch_store_b32 off, v167, s33 offset:416
; DAGISEL64-NEXT: scratch_store_b32 off, v176, s33 offset:420
; DAGISEL64-NEXT: scratch_store_b32 off, v177, s33 offset:424
; DAGISEL64-NEXT: scratch_store_b32 off, v178, s33 offset:428
; DAGISEL64-NEXT: scratch_store_b32 off, v179, s33 offset:432
; DAGISEL64-NEXT: scratch_store_b32 off, v180, s33 offset:436
; DAGISEL64-NEXT: scratch_store_b32 off, v181, s33 offset:440
; DAGISEL64-NEXT: scratch_store_b32 off, v182, s33 offset:444
; DAGISEL64-NEXT: scratch_store_b32 off, v183, s33 offset:448
; DAGISEL64-NEXT: scratch_store_b32 off, v192, s33 offset:452
; DAGISEL64-NEXT: scratch_store_b32 off, v193, s33 offset:456
; DAGISEL64-NEXT: scratch_store_b32 off, v194, s33 offset:460
; DAGISEL64-NEXT: scratch_store_b32 off, v195, s33 offset:464
; DAGISEL64-NEXT: scratch_store_b32 off, v196, s33 offset:468
; DAGISEL64-NEXT: scratch_store_b32 off, v197, s33 offset:472
; DAGISEL64-NEXT: scratch_store_b32 off, v198, s33 offset:476
; DAGISEL64-NEXT: scratch_store_b32 off, v199, s33 offset:480
; DAGISEL64-NEXT: scratch_store_b32 off, v208, s33 offset:484
; DAGISEL64-NEXT: scratch_store_b32 off, v209, s33 offset:488
; DAGISEL64-NEXT: scratch_store_b32 off, v210, s33 offset:492
; DAGISEL64-NEXT: scratch_store_b32 off, v211, s33 offset:496
; DAGISEL64-NEXT: scratch_store_b32 off, v212, s33 offset:500
; DAGISEL64-NEXT: scratch_store_b32 off, v213, s33 offset:504
; DAGISEL64-NEXT: scratch_store_b32 off, v214, s33 offset:508
; DAGISEL64-NEXT: scratch_store_b32 off, v215, s33 offset:512
; DAGISEL64-NEXT: s_clause 0xf
; DAGISEL64-NEXT: scratch_store_b32 off, v224, s33 offset:516
; DAGISEL64-NEXT: scratch_store_b32 off, v225, s33 offset:520
; DAGISEL64-NEXT: scratch_store_b32 off, v226, s33 offset:524
; DAGISEL64-NEXT: scratch_store_b32 off, v227, s33 offset:528
; DAGISEL64-NEXT: scratch_store_b32 off, v228, s33 offset:532
; DAGISEL64-NEXT: scratch_store_b32 off, v229, s33 offset:536
; DAGISEL64-NEXT: scratch_store_b32 off, v230, s33 offset:540
; DAGISEL64-NEXT: scratch_store_b32 off, v231, s33 offset:544
; DAGISEL64-NEXT: scratch_store_b32 off, v240, s33 offset:548
; DAGISEL64-NEXT: scratch_store_b32 off, v241, s33 offset:552
; DAGISEL64-NEXT: scratch_store_b32 off, v242, s33 offset:556
; DAGISEL64-NEXT: scratch_store_b32 off, v243, s33 offset:560
; DAGISEL64-NEXT: scratch_store_b32 off, v244, s33 offset:564
; DAGISEL64-NEXT: scratch_store_b32 off, v245, s33 offset:568
; DAGISEL64-NEXT: scratch_store_b32 off, v246, s33 offset:572
; DAGISEL64-NEXT: scratch_store_b32 off, v247, s33 offset:576
; DAGISEL64-NEXT: s_mov_b64 exec, -1
; DAGISEL64-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; DAGISEL64-NEXT: s_wait_alu 0xfffe
; DAGISEL64-NEXT: v_writelane_b32 v40, s0, 4
; DAGISEL64-NEXT: v_mov_b32_e32 v2, v0
; DAGISEL64-NEXT: v_swap_b32 v0, v1
; DAGISEL64-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi
; DAGISEL64-NEXT: v_writelane_b32 v40, s4, 0
; DAGISEL64-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo
; DAGISEL64-NEXT: s_addk_co_i32 s32, 0x250
; DAGISEL64-NEXT: v_writelane_b32 v40, s5, 1
; DAGISEL64-NEXT: v_writelane_b32 v40, s30, 2
; DAGISEL64-NEXT: v_writelane_b32 v40, s31, 3
; DAGISEL64-NEXT: s_wait_alu 0xfffe
; DAGISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1]
; DAGISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL64-NEXT: v_readlane_b32 s31, v40, 3
; DAGISEL64-NEXT: v_readlane_b32 s30, v40, 2
; DAGISEL64-NEXT: v_readlane_b32 s5, v40, 1
; DAGISEL64-NEXT: v_readlane_b32 s4, v40, 0
; DAGISEL64-NEXT: v_readlane_b32 s0, v40, 4
; DAGISEL64-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; DAGISEL64-NEXT: s_mov_b32 s32, s33
; DAGISEL64-NEXT: s_xor_b64 exec, s[4:5], -1
; DAGISEL64-NEXT: s_clause 0x1f
; DAGISEL64-NEXT: scratch_load_b32 v0, off, s33 offset:4
; DAGISEL64-NEXT: scratch_load_b32 v1, off, s33 offset:8
; DAGISEL64-NEXT: scratch_load_b32 v2, off, s33 offset:12
; DAGISEL64-NEXT: scratch_load_b32 v3, off, s33 offset:16
; DAGISEL64-NEXT: scratch_load_b32 v4, off, s33 offset:20
; DAGISEL64-NEXT: scratch_load_b32 v5, off, s33 offset:24
; DAGISEL64-NEXT: scratch_load_b32 v6, off, s33 offset:28
; DAGISEL64-NEXT: scratch_load_b32 v7, off, s33 offset:32
; DAGISEL64-NEXT: scratch_load_b32 v8, off, s33 offset:36
; DAGISEL64-NEXT: scratch_load_b32 v9, off, s33 offset:40
; DAGISEL64-NEXT: scratch_load_b32 v10, off, s33 offset:44
; DAGISEL64-NEXT: scratch_load_b32 v11, off, s33 offset:48
; DAGISEL64-NEXT: scratch_load_b32 v12, off, s33 offset:52
; DAGISEL64-NEXT: scratch_load_b32 v13, off, s33 offset:56
; DAGISEL64-NEXT: scratch_load_b32 v14, off, s33 offset:60
; DAGISEL64-NEXT: scratch_load_b32 v15, off, s33 offset:64
; DAGISEL64-NEXT: scratch_load_b32 v16, off, s33 offset:68
; DAGISEL64-NEXT: scratch_load_b32 v17, off, s33 offset:72
; DAGISEL64-NEXT: scratch_load_b32 v18, off, s33 offset:76
; DAGISEL64-NEXT: scratch_load_b32 v19, off, s33 offset:80
; DAGISEL64-NEXT: scratch_load_b32 v20, off, s33 offset:84
; DAGISEL64-NEXT: scratch_load_b32 v21, off, s33 offset:88
; DAGISEL64-NEXT: scratch_load_b32 v22, off, s33 offset:92
; DAGISEL64-NEXT: scratch_load_b32 v23, off, s33 offset:96
; DAGISEL64-NEXT: scratch_load_b32 v24, off, s33 offset:100
; DAGISEL64-NEXT: scratch_load_b32 v25, off, s33 offset:104
; DAGISEL64-NEXT: scratch_load_b32 v26, off, s33 offset:108
; DAGISEL64-NEXT: scratch_load_b32 v27, off, s33 offset:112
; DAGISEL64-NEXT: scratch_load_b32 v28, off, s33 offset:116
; DAGISEL64-NEXT: scratch_load_b32 v29, off, s33 offset:120
; DAGISEL64-NEXT: scratch_load_b32 v30, off, s33 offset:124
; DAGISEL64-NEXT: scratch_load_b32 v31, off, s33 offset:128
; DAGISEL64-NEXT: s_clause 0x1f
; DAGISEL64-NEXT: scratch_load_b32 v32, off, s33 offset:132
; DAGISEL64-NEXT: scratch_load_b32 v33, off, s33 offset:136
; DAGISEL64-NEXT: scratch_load_b32 v34, off, s33 offset:140
; DAGISEL64-NEXT: scratch_load_b32 v35, off, s33 offset:144
; DAGISEL64-NEXT: scratch_load_b32 v36, off, s33 offset:148
; DAGISEL64-NEXT: scratch_load_b32 v37, off, s33 offset:152
; DAGISEL64-NEXT: scratch_load_b32 v38, off, s33 offset:156
; DAGISEL64-NEXT: scratch_load_b32 v39, off, s33 offset:160
; DAGISEL64-NEXT: scratch_load_b32 v48, off, s33 offset:164
; DAGISEL64-NEXT: scratch_load_b32 v49, off, s33 offset:168
; DAGISEL64-NEXT: scratch_load_b32 v50, off, s33 offset:172
; DAGISEL64-NEXT: scratch_load_b32 v51, off, s33 offset:176
; DAGISEL64-NEXT: scratch_load_b32 v52, off, s33 offset:180
; DAGISEL64-NEXT: scratch_load_b32 v53, off, s33 offset:184
; DAGISEL64-NEXT: scratch_load_b32 v54, off, s33 offset:188
; DAGISEL64-NEXT: scratch_load_b32 v55, off, s33 offset:192
; DAGISEL64-NEXT: scratch_load_b32 v64, off, s33 offset:196
; DAGISEL64-NEXT: scratch_load_b32 v65, off, s33 offset:200
; DAGISEL64-NEXT: scratch_load_b32 v66, off, s33 offset:204
; DAGISEL64-NEXT: scratch_load_b32 v67, off, s33 offset:208
; DAGISEL64-NEXT: scratch_load_b32 v68, off, s33 offset:212
; DAGISEL64-NEXT: scratch_load_b32 v69, off, s33 offset:216
; DAGISEL64-NEXT: scratch_load_b32 v70, off, s33 offset:220
; DAGISEL64-NEXT: scratch_load_b32 v71, off, s33 offset:224
; DAGISEL64-NEXT: scratch_load_b32 v80, off, s33 offset:228
; DAGISEL64-NEXT: scratch_load_b32 v81, off, s33 offset:232
; DAGISEL64-NEXT: scratch_load_b32 v82, off, s33 offset:236
; DAGISEL64-NEXT: scratch_load_b32 v83, off, s33 offset:240
; DAGISEL64-NEXT: scratch_load_b32 v84, off, s33 offset:244
; DAGISEL64-NEXT: scratch_load_b32 v85, off, s33 offset:248
; DAGISEL64-NEXT: scratch_load_b32 v86, off, s33 offset:252
; DAGISEL64-NEXT: scratch_load_b32 v87, off, s33 offset:256
; DAGISEL64-NEXT: s_clause 0x1f
; DAGISEL64-NEXT: scratch_load_b32 v96, off, s33 offset:260
; DAGISEL64-NEXT: scratch_load_b32 v97, off, s33 offset:264
; DAGISEL64-NEXT: scratch_load_b32 v98, off, s33 offset:268
; DAGISEL64-NEXT: scratch_load_b32 v99, off, s33 offset:272
; DAGISEL64-NEXT: scratch_load_b32 v100, off, s33 offset:276
; DAGISEL64-NEXT: scratch_load_b32 v101, off, s33 offset:280
; DAGISEL64-NEXT: scratch_load_b32 v102, off, s33 offset:284
; DAGISEL64-NEXT: scratch_load_b32 v103, off, s33 offset:288
; DAGISEL64-NEXT: scratch_load_b32 v112, off, s33 offset:292
; DAGISEL64-NEXT: scratch_load_b32 v113, off, s33 offset:296
; DAGISEL64-NEXT: scratch_load_b32 v114, off, s33 offset:300
; DAGISEL64-NEXT: scratch_load_b32 v115, off, s33 offset:304
; DAGISEL64-NEXT: scratch_load_b32 v116, off, s33 offset:308
; DAGISEL64-NEXT: scratch_load_b32 v117, off, s33 offset:312
; DAGISEL64-NEXT: scratch_load_b32 v118, off, s33 offset:316
; DAGISEL64-NEXT: scratch_load_b32 v119, off, s33 offset:320
; DAGISEL64-NEXT: scratch_load_b32 v128, off, s33 offset:324
; DAGISEL64-NEXT: scratch_load_b32 v129, off, s33 offset:328
; DAGISEL64-NEXT: scratch_load_b32 v130, off, s33 offset:332
; DAGISEL64-NEXT: scratch_load_b32 v131, off, s33 offset:336
; DAGISEL64-NEXT: scratch_load_b32 v132, off, s33 offset:340
; DAGISEL64-NEXT: scratch_load_b32 v133, off, s33 offset:344
; DAGISEL64-NEXT: scratch_load_b32 v134, off, s33 offset:348
; DAGISEL64-NEXT: scratch_load_b32 v135, off, s33 offset:352
; DAGISEL64-NEXT: scratch_load_b32 v144, off, s33 offset:356
; DAGISEL64-NEXT: scratch_load_b32 v145, off, s33 offset:360
; DAGISEL64-NEXT: scratch_load_b32 v146, off, s33 offset:364
; DAGISEL64-NEXT: scratch_load_b32 v147, off, s33 offset:368
; DAGISEL64-NEXT: scratch_load_b32 v148, off, s33 offset:372
; DAGISEL64-NEXT: scratch_load_b32 v149, off, s33 offset:376
; DAGISEL64-NEXT: scratch_load_b32 v150, off, s33 offset:380
; DAGISEL64-NEXT: scratch_load_b32 v151, off, s33 offset:384
; DAGISEL64-NEXT: s_clause 0x1f
; DAGISEL64-NEXT: scratch_load_b32 v160, off, s33 offset:388
; DAGISEL64-NEXT: scratch_load_b32 v161, off, s33 offset:392
; DAGISEL64-NEXT: scratch_load_b32 v162, off, s33 offset:396
; DAGISEL64-NEXT: scratch_load_b32 v163, off, s33 offset:400
; DAGISEL64-NEXT: scratch_load_b32 v164, off, s33 offset:404
; DAGISEL64-NEXT: scratch_load_b32 v165, off, s33 offset:408
; DAGISEL64-NEXT: scratch_load_b32 v166, off, s33 offset:412
; DAGISEL64-NEXT: scratch_load_b32 v167, off, s33 offset:416
; DAGISEL64-NEXT: scratch_load_b32 v176, off, s33 offset:420
; DAGISEL64-NEXT: scratch_load_b32 v177, off, s33 offset:424
; DAGISEL64-NEXT: scratch_load_b32 v178, off, s33 offset:428
; DAGISEL64-NEXT: scratch_load_b32 v179, off, s33 offset:432
; DAGISEL64-NEXT: scratch_load_b32 v180, off, s33 offset:436
; DAGISEL64-NEXT: scratch_load_b32 v181, off, s33 offset:440
; DAGISEL64-NEXT: scratch_load_b32 v182, off, s33 offset:444
; DAGISEL64-NEXT: scratch_load_b32 v183, off, s33 offset:448
; DAGISEL64-NEXT: scratch_load_b32 v192, off, s33 offset:452
; DAGISEL64-NEXT: scratch_load_b32 v193, off, s33 offset:456
; DAGISEL64-NEXT: scratch_load_b32 v194, off, s33 offset:460
; DAGISEL64-NEXT: scratch_load_b32 v195, off, s33 offset:464
; DAGISEL64-NEXT: scratch_load_b32 v196, off, s33 offset:468
; DAGISEL64-NEXT: scratch_load_b32 v197, off, s33 offset:472
; DAGISEL64-NEXT: scratch_load_b32 v198, off, s33 offset:476
; DAGISEL64-NEXT: scratch_load_b32 v199, off, s33 offset:480
; DAGISEL64-NEXT: scratch_load_b32 v208, off, s33 offset:484
; DAGISEL64-NEXT: scratch_load_b32 v209, off, s33 offset:488
; DAGISEL64-NEXT: scratch_load_b32 v210, off, s33 offset:492
; DAGISEL64-NEXT: scratch_load_b32 v211, off, s33 offset:496
; DAGISEL64-NEXT: scratch_load_b32 v212, off, s33 offset:500
; DAGISEL64-NEXT: scratch_load_b32 v213, off, s33 offset:504
; DAGISEL64-NEXT: scratch_load_b32 v214, off, s33 offset:508
; DAGISEL64-NEXT: scratch_load_b32 v215, off, s33 offset:512
; DAGISEL64-NEXT: s_clause 0xf
; DAGISEL64-NEXT: scratch_load_b32 v224, off, s33 offset:516
; DAGISEL64-NEXT: scratch_load_b32 v225, off, s33 offset:520
; DAGISEL64-NEXT: scratch_load_b32 v226, off, s33 offset:524
; DAGISEL64-NEXT: scratch_load_b32 v227, off, s33 offset:528
; DAGISEL64-NEXT: scratch_load_b32 v228, off, s33 offset:532
; DAGISEL64-NEXT: scratch_load_b32 v229, off, s33 offset:536
; DAGISEL64-NEXT: scratch_load_b32 v230, off, s33 offset:540
; DAGISEL64-NEXT: scratch_load_b32 v231, off, s33 offset:544
; DAGISEL64-NEXT: scratch_load_b32 v240, off, s33 offset:548
; DAGISEL64-NEXT: scratch_load_b32 v241, off, s33 offset:552
; DAGISEL64-NEXT: scratch_load_b32 v242, off, s33 offset:556
; DAGISEL64-NEXT: scratch_load_b32 v243, off, s33 offset:560
; DAGISEL64-NEXT: scratch_load_b32 v244, off, s33 offset:564
; DAGISEL64-NEXT: scratch_load_b32 v245, off, s33 offset:568
; DAGISEL64-NEXT: scratch_load_b32 v246, off, s33 offset:572
; DAGISEL64-NEXT: scratch_load_b32 v247, off, s33 offset:576
; DAGISEL64-NEXT: s_mov_b64 exec, s[4:5]
; DAGISEL64-NEXT: s_mov_b32 s33, s0
; DAGISEL64-NEXT: s_wait_loadcnt 0x0
; DAGISEL64-NEXT: s_wait_alu 0xfffe
; DAGISEL64-NEXT: s_setpc_b64 s[30:31]
;
; GISEL64-LABEL: call_gfx_from_whole_wave:
; GISEL64: ; %bb.0:
; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL64-NEXT: s_wait_expcnt 0x0
; GISEL64-NEXT: s_wait_samplecnt 0x0
; GISEL64-NEXT: s_wait_bvhcnt 0x0
; GISEL64-NEXT: s_wait_kmcnt 0x0
; GISEL64-NEXT: s_mov_b32 s0, s33
; GISEL64-NEXT: s_mov_b32 s33, s32
; GISEL64-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GISEL64-NEXT: s_clause 0x1f
; GISEL64-NEXT: scratch_store_b32 off, v0, s33 offset:4
; GISEL64-NEXT: scratch_store_b32 off, v1, s33 offset:8
; GISEL64-NEXT: scratch_store_b32 off, v2, s33 offset:12
; GISEL64-NEXT: scratch_store_b32 off, v3, s33 offset:16
; GISEL64-NEXT: scratch_store_b32 off, v4, s33 offset:20
; GISEL64-NEXT: scratch_store_b32 off, v5, s33 offset:24
; GISEL64-NEXT: scratch_store_b32 off, v6, s33 offset:28
; GISEL64-NEXT: scratch_store_b32 off, v7, s33 offset:32
; GISEL64-NEXT: scratch_store_b32 off, v8, s33 offset:36
; GISEL64-NEXT: scratch_store_b32 off, v9, s33 offset:40
; GISEL64-NEXT: scratch_store_b32 off, v10, s33 offset:44
; GISEL64-NEXT: scratch_store_b32 off, v11, s33 offset:48
; GISEL64-NEXT: scratch_store_b32 off, v12, s33 offset:52
; GISEL64-NEXT: scratch_store_b32 off, v13, s33 offset:56
; GISEL64-NEXT: scratch_store_b32 off, v14, s33 offset:60
; GISEL64-NEXT: scratch_store_b32 off, v15, s33 offset:64
; GISEL64-NEXT: scratch_store_b32 off, v16, s33 offset:68
; GISEL64-NEXT: scratch_store_b32 off, v17, s33 offset:72
; GISEL64-NEXT: scratch_store_b32 off, v18, s33 offset:76
; GISEL64-NEXT: scratch_store_b32 off, v19, s33 offset:80
; GISEL64-NEXT: scratch_store_b32 off, v20, s33 offset:84
; GISEL64-NEXT: scratch_store_b32 off, v21, s33 offset:88
; GISEL64-NEXT: scratch_store_b32 off, v22, s33 offset:92
; GISEL64-NEXT: scratch_store_b32 off, v23, s33 offset:96
; GISEL64-NEXT: scratch_store_b32 off, v24, s33 offset:100
; GISEL64-NEXT: scratch_store_b32 off, v25, s33 offset:104
; GISEL64-NEXT: scratch_store_b32 off, v26, s33 offset:108
; GISEL64-NEXT: scratch_store_b32 off, v27, s33 offset:112
; GISEL64-NEXT: scratch_store_b32 off, v28, s33 offset:116
; GISEL64-NEXT: scratch_store_b32 off, v29, s33 offset:120
; GISEL64-NEXT: scratch_store_b32 off, v30, s33 offset:124
; GISEL64-NEXT: scratch_store_b32 off, v31, s33 offset:128
; GISEL64-NEXT: s_clause 0x1f
; GISEL64-NEXT: scratch_store_b32 off, v32, s33 offset:132
; GISEL64-NEXT: scratch_store_b32 off, v33, s33 offset:136
; GISEL64-NEXT: scratch_store_b32 off, v34, s33 offset:140
; GISEL64-NEXT: scratch_store_b32 off, v35, s33 offset:144
; GISEL64-NEXT: scratch_store_b32 off, v36, s33 offset:148
; GISEL64-NEXT: scratch_store_b32 off, v37, s33 offset:152
; GISEL64-NEXT: scratch_store_b32 off, v38, s33 offset:156
; GISEL64-NEXT: scratch_store_b32 off, v39, s33 offset:160
; GISEL64-NEXT: scratch_store_b32 off, v48, s33 offset:164
; GISEL64-NEXT: scratch_store_b32 off, v49, s33 offset:168
; GISEL64-NEXT: scratch_store_b32 off, v50, s33 offset:172
; GISEL64-NEXT: scratch_store_b32 off, v51, s33 offset:176
; GISEL64-NEXT: scratch_store_b32 off, v52, s33 offset:180
; GISEL64-NEXT: scratch_store_b32 off, v53, s33 offset:184
; GISEL64-NEXT: scratch_store_b32 off, v54, s33 offset:188
; GISEL64-NEXT: scratch_store_b32 off, v55, s33 offset:192
; GISEL64-NEXT: scratch_store_b32 off, v64, s33 offset:196
; GISEL64-NEXT: scratch_store_b32 off, v65, s33 offset:200
; GISEL64-NEXT: scratch_store_b32 off, v66, s33 offset:204
; GISEL64-NEXT: scratch_store_b32 off, v67, s33 offset:208
; GISEL64-NEXT: scratch_store_b32 off, v68, s33 offset:212
; GISEL64-NEXT: scratch_store_b32 off, v69, s33 offset:216
; GISEL64-NEXT: scratch_store_b32 off, v70, s33 offset:220
; GISEL64-NEXT: scratch_store_b32 off, v71, s33 offset:224
; GISEL64-NEXT: scratch_store_b32 off, v80, s33 offset:228
; GISEL64-NEXT: scratch_store_b32 off, v81, s33 offset:232
; GISEL64-NEXT: scratch_store_b32 off, v82, s33 offset:236
; GISEL64-NEXT: scratch_store_b32 off, v83, s33 offset:240
; GISEL64-NEXT: scratch_store_b32 off, v84, s33 offset:244
; GISEL64-NEXT: scratch_store_b32 off, v85, s33 offset:248
; GISEL64-NEXT: scratch_store_b32 off, v86, s33 offset:252
; GISEL64-NEXT: scratch_store_b32 off, v87, s33 offset:256
; GISEL64-NEXT: s_clause 0x1f
; GISEL64-NEXT: scratch_store_b32 off, v96, s33 offset:260
; GISEL64-NEXT: scratch_store_b32 off, v97, s33 offset:264
; GISEL64-NEXT: scratch_store_b32 off, v98, s33 offset:268
; GISEL64-NEXT: scratch_store_b32 off, v99, s33 offset:272
; GISEL64-NEXT: scratch_store_b32 off, v100, s33 offset:276
; GISEL64-NEXT: scratch_store_b32 off, v101, s33 offset:280
; GISEL64-NEXT: scratch_store_b32 off, v102, s33 offset:284
; GISEL64-NEXT: scratch_store_b32 off, v103, s33 offset:288
; GISEL64-NEXT: scratch_store_b32 off, v112, s33 offset:292
; GISEL64-NEXT: scratch_store_b32 off, v113, s33 offset:296
; GISEL64-NEXT: scratch_store_b32 off, v114, s33 offset:300
; GISEL64-NEXT: scratch_store_b32 off, v115, s33 offset:304
; GISEL64-NEXT: scratch_store_b32 off, v116, s33 offset:308
; GISEL64-NEXT: scratch_store_b32 off, v117, s33 offset:312
; GISEL64-NEXT: scratch_store_b32 off, v118, s33 offset:316
; GISEL64-NEXT: scratch_store_b32 off, v119, s33 offset:320
; GISEL64-NEXT: scratch_store_b32 off, v128, s33 offset:324
; GISEL64-NEXT: scratch_store_b32 off, v129, s33 offset:328
; GISEL64-NEXT: scratch_store_b32 off, v130, s33 offset:332
; GISEL64-NEXT: scratch_store_b32 off, v131, s33 offset:336
; GISEL64-NEXT: scratch_store_b32 off, v132, s33 offset:340
; GISEL64-NEXT: scratch_store_b32 off, v133, s33 offset:344
; GISEL64-NEXT: scratch_store_b32 off, v134, s33 offset:348
; GISEL64-NEXT: scratch_store_b32 off, v135, s33 offset:352
; GISEL64-NEXT: scratch_store_b32 off, v144, s33 offset:356
; GISEL64-NEXT: scratch_store_b32 off, v145, s33 offset:360
; GISEL64-NEXT: scratch_store_b32 off, v146, s33 offset:364
; GISEL64-NEXT: scratch_store_b32 off, v147, s33 offset:368
; GISEL64-NEXT: scratch_store_b32 off, v148, s33 offset:372
; GISEL64-NEXT: scratch_store_b32 off, v149, s33 offset:376
; GISEL64-NEXT: scratch_store_b32 off, v150, s33 offset:380
; GISEL64-NEXT: scratch_store_b32 off, v151, s33 offset:384
; GISEL64-NEXT: s_clause 0x1f
; GISEL64-NEXT: scratch_store_b32 off, v160, s33 offset:388
; GISEL64-NEXT: scratch_store_b32 off, v161, s33 offset:392
; GISEL64-NEXT: scratch_store_b32 off, v162, s33 offset:396
; GISEL64-NEXT: scratch_store_b32 off, v163, s33 offset:400
; GISEL64-NEXT: scratch_store_b32 off, v164, s33 offset:404
; GISEL64-NEXT: scratch_store_b32 off, v165, s33 offset:408
; GISEL64-NEXT: scratch_store_b32 off, v166, s33 offset:412
; GISEL64-NEXT: scratch_store_b32 off, v167, s33 offset:416
; GISEL64-NEXT: scratch_store_b32 off, v176, s33 offset:420
; GISEL64-NEXT: scratch_store_b32 off, v177, s33 offset:424
; GISEL64-NEXT: scratch_store_b32 off, v178, s33 offset:428
; GISEL64-NEXT: scratch_store_b32 off, v179, s33 offset:432
; GISEL64-NEXT: scratch_store_b32 off, v180, s33 offset:436
; GISEL64-NEXT: scratch_store_b32 off, v181, s33 offset:440
; GISEL64-NEXT: scratch_store_b32 off, v182, s33 offset:444
; GISEL64-NEXT: scratch_store_b32 off, v183, s33 offset:448
; GISEL64-NEXT: scratch_store_b32 off, v192, s33 offset:452
; GISEL64-NEXT: scratch_store_b32 off, v193, s33 offset:456
; GISEL64-NEXT: scratch_store_b32 off, v194, s33 offset:460
; GISEL64-NEXT: scratch_store_b32 off, v195, s33 offset:464
; GISEL64-NEXT: scratch_store_b32 off, v196, s33 offset:468
; GISEL64-NEXT: scratch_store_b32 off, v197, s33 offset:472
; GISEL64-NEXT: scratch_store_b32 off, v198, s33 offset:476
; GISEL64-NEXT: scratch_store_b32 off, v199, s33 offset:480
; GISEL64-NEXT: scratch_store_b32 off, v208, s33 offset:484
; GISEL64-NEXT: scratch_store_b32 off, v209, s33 offset:488
; GISEL64-NEXT: scratch_store_b32 off, v210, s33 offset:492
; GISEL64-NEXT: scratch_store_b32 off, v211, s33 offset:496
; GISEL64-NEXT: scratch_store_b32 off, v212, s33 offset:500
; GISEL64-NEXT: scratch_store_b32 off, v213, s33 offset:504
; GISEL64-NEXT: scratch_store_b32 off, v214, s33 offset:508
; GISEL64-NEXT: scratch_store_b32 off, v215, s33 offset:512
; GISEL64-NEXT: s_clause 0xf
; GISEL64-NEXT: scratch_store_b32 off, v224, s33 offset:516
; GISEL64-NEXT: scratch_store_b32 off, v225, s33 offset:520
; GISEL64-NEXT: scratch_store_b32 off, v226, s33 offset:524
; GISEL64-NEXT: scratch_store_b32 off, v227, s33 offset:528
; GISEL64-NEXT: scratch_store_b32 off, v228, s33 offset:532
; GISEL64-NEXT: scratch_store_b32 off, v229, s33 offset:536
; GISEL64-NEXT: scratch_store_b32 off, v230, s33 offset:540
; GISEL64-NEXT: scratch_store_b32 off, v231, s33 offset:544
; GISEL64-NEXT: scratch_store_b32 off, v240, s33 offset:548
; GISEL64-NEXT: scratch_store_b32 off, v241, s33 offset:552
; GISEL64-NEXT: scratch_store_b32 off, v242, s33 offset:556
; GISEL64-NEXT: scratch_store_b32 off, v243, s33 offset:560
; GISEL64-NEXT: scratch_store_b32 off, v244, s33 offset:564
; GISEL64-NEXT: scratch_store_b32 off, v245, s33 offset:568
; GISEL64-NEXT: scratch_store_b32 off, v246, s33 offset:572
; GISEL64-NEXT: scratch_store_b32 off, v247, s33 offset:576
; GISEL64-NEXT: s_mov_b64 exec, -1
; GISEL64-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GISEL64-NEXT: s_wait_alu 0xfffe
; GISEL64-NEXT: v_writelane_b32 v40, s0, 4
; GISEL64-NEXT: v_mov_b32_e32 v2, v0
; GISEL64-NEXT: v_swap_b32 v0, v1
; GISEL64-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo
; GISEL64-NEXT: v_writelane_b32 v40, s4, 0
; GISEL64-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi
; GISEL64-NEXT: s_addk_co_i32 s32, 0x250
; GISEL64-NEXT: v_writelane_b32 v40, s5, 1
; GISEL64-NEXT: v_writelane_b32 v40, s30, 2
; GISEL64-NEXT: v_writelane_b32 v40, s31, 3
; GISEL64-NEXT: s_wait_alu 0xfffe
; GISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL64-NEXT: v_readlane_b32 s31, v40, 3
; GISEL64-NEXT: v_readlane_b32 s30, v40, 2
; GISEL64-NEXT: v_readlane_b32 s5, v40, 1
; GISEL64-NEXT: v_readlane_b32 s4, v40, 0
; GISEL64-NEXT: v_readlane_b32 s0, v40, 4
; GISEL64-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GISEL64-NEXT: s_mov_b32 s32, s33
; GISEL64-NEXT: s_xor_b64 exec, s[4:5], -1
; GISEL64-NEXT: s_clause 0x1f
; GISEL64-NEXT: scratch_load_b32 v0, off, s33 offset:4
; GISEL64-NEXT: scratch_load_b32 v1, off, s33 offset:8
; GISEL64-NEXT: scratch_load_b32 v2, off, s33 offset:12
; GISEL64-NEXT: scratch_load_b32 v3, off, s33 offset:16
; GISEL64-NEXT: scratch_load_b32 v4, off, s33 offset:20
; GISEL64-NEXT: scratch_load_b32 v5, off, s33 offset:24
; GISEL64-NEXT: scratch_load_b32 v6, off, s33 offset:28
; GISEL64-NEXT: scratch_load_b32 v7, off, s33 offset:32
; GISEL64-NEXT: scratch_load_b32 v8, off, s33 offset:36
; GISEL64-NEXT: scratch_load_b32 v9, off, s33 offset:40
; GISEL64-NEXT: scratch_load_b32 v10, off, s33 offset:44
; GISEL64-NEXT: scratch_load_b32 v11, off, s33 offset:48
; GISEL64-NEXT: scratch_load_b32 v12, off, s33 offset:52
; GISEL64-NEXT: scratch_load_b32 v13, off, s33 offset:56
; GISEL64-NEXT: scratch_load_b32 v14, off, s33 offset:60
; GISEL64-NEXT: scratch_load_b32 v15, off, s33 offset:64
; GISEL64-NEXT: scratch_load_b32 v16, off, s33 offset:68
; GISEL64-NEXT: scratch_load_b32 v17, off, s33 offset:72
; GISEL64-NEXT: scratch_load_b32 v18, off, s33 offset:76
; GISEL64-NEXT: scratch_load_b32 v19, off, s33 offset:80
; GISEL64-NEXT: scratch_load_b32 v20, off, s33 offset:84
; GISEL64-NEXT: scratch_load_b32 v21, off, s33 offset:88
; GISEL64-NEXT: scratch_load_b32 v22, off, s33 offset:92
; GISEL64-NEXT: scratch_load_b32 v23, off, s33 offset:96
; GISEL64-NEXT: scratch_load_b32 v24, off, s33 offset:100
; GISEL64-NEXT: scratch_load_b32 v25, off, s33 offset:104
; GISEL64-NEXT: scratch_load_b32 v26, off, s33 offset:108
; GISEL64-NEXT: scratch_load_b32 v27, off, s33 offset:112
; GISEL64-NEXT: scratch_load_b32 v28, off, s33 offset:116
; GISEL64-NEXT: scratch_load_b32 v29, off, s33 offset:120
; GISEL64-NEXT: scratch_load_b32 v30, off, s33 offset:124
; GISEL64-NEXT: scratch_load_b32 v31, off, s33 offset:128
; GISEL64-NEXT: s_clause 0x1f
; GISEL64-NEXT: scratch_load_b32 v32, off, s33 offset:132
; GISEL64-NEXT: scratch_load_b32 v33, off, s33 offset:136
; GISEL64-NEXT: scratch_load_b32 v34, off, s33 offset:140
; GISEL64-NEXT: scratch_load_b32 v35, off, s33 offset:144
; GISEL64-NEXT: scratch_load_b32 v36, off, s33 offset:148
; GISEL64-NEXT: scratch_load_b32 v37, off, s33 offset:152
; GISEL64-NEXT: scratch_load_b32 v38, off, s33 offset:156
; GISEL64-NEXT: scratch_load_b32 v39, off, s33 offset:160
; GISEL64-NEXT: scratch_load_b32 v48, off, s33 offset:164
; GISEL64-NEXT: scratch_load_b32 v49, off, s33 offset:168
; GISEL64-NEXT: scratch_load_b32 v50, off, s33 offset:172
; GISEL64-NEXT: scratch_load_b32 v51, off, s33 offset:176
; GISEL64-NEXT: scratch_load_b32 v52, off, s33 offset:180
; GISEL64-NEXT: scratch_load_b32 v53, off, s33 offset:184
; GISEL64-NEXT: scratch_load_b32 v54, off, s33 offset:188
; GISEL64-NEXT: scratch_load_b32 v55, off, s33 offset:192
; GISEL64-NEXT: scratch_load_b32 v64, off, s33 offset:196
; GISEL64-NEXT: scratch_load_b32 v65, off, s33 offset:200
; GISEL64-NEXT: scratch_load_b32 v66, off, s33 offset:204
; GISEL64-NEXT: scratch_load_b32 v67, off, s33 offset:208
; GISEL64-NEXT: scratch_load_b32 v68, off, s33 offset:212
; GISEL64-NEXT: scratch_load_b32 v69, off, s33 offset:216
; GISEL64-NEXT: scratch_load_b32 v70, off, s33 offset:220
; GISEL64-NEXT: scratch_load_b32 v71, off, s33 offset:224
; GISEL64-NEXT: scratch_load_b32 v80, off, s33 offset:228
; GISEL64-NEXT: scratch_load_b32 v81, off, s33 offset:232
; GISEL64-NEXT: scratch_load_b32 v82, off, s33 offset:236
; GISEL64-NEXT: scratch_load_b32 v83, off, s33 offset:240
; GISEL64-NEXT: scratch_load_b32 v84, off, s33 offset:244
; GISEL64-NEXT: scratch_load_b32 v85, off, s33 offset:248
; GISEL64-NEXT: scratch_load_b32 v86, off, s33 offset:252
; GISEL64-NEXT: scratch_load_b32 v87, off, s33 offset:256
; GISEL64-NEXT: s_clause 0x1f
; GISEL64-NEXT: scratch_load_b32 v96, off, s33 offset:260
; GISEL64-NEXT: scratch_load_b32 v97, off, s33 offset:264
; GISEL64-NEXT: scratch_load_b32 v98, off, s33 offset:268
; GISEL64-NEXT: scratch_load_b32 v99, off, s33 offset:272
; GISEL64-NEXT: scratch_load_b32 v100, off, s33 offset:276
; GISEL64-NEXT: scratch_load_b32 v101, off, s33 offset:280
; GISEL64-NEXT: scratch_load_b32 v102, off, s33 offset:284
; GISEL64-NEXT: scratch_load_b32 v103, off, s33 offset:288
; GISEL64-NEXT: scratch_load_b32 v112, off, s33 offset:292
; GISEL64-NEXT: scratch_load_b32 v113, off, s33 offset:296
; GISEL64-NEXT: scratch_load_b32 v114, off, s33 offset:300
; GISEL64-NEXT: scratch_load_b32 v115, off, s33 offset:304
; GISEL64-NEXT: scratch_load_b32 v116, off, s33 offset:308
; GISEL64-NEXT: scratch_load_b32 v117, off, s33 offset:312
; GISEL64-NEXT: scratch_load_b32 v118, off, s33 offset:316
; GISEL64-NEXT: scratch_load_b32 v119, off, s33 offset:320
; GISEL64-NEXT: scratch_load_b32 v128, off, s33 offset:324
; GISEL64-NEXT: scratch_load_b32 v129, off, s33 offset:328
; GISEL64-NEXT: scratch_load_b32 v130, off, s33 offset:332
; GISEL64-NEXT: scratch_load_b32 v131, off, s33 offset:336
; GISEL64-NEXT: scratch_load_b32 v132, off, s33 offset:340
; GISEL64-NEXT: scratch_load_b32 v133, off, s33 offset:344
; GISEL64-NEXT: scratch_load_b32 v134, off, s33 offset:348
; GISEL64-NEXT: scratch_load_b32 v135, off, s33 offset:352
; GISEL64-NEXT: scratch_load_b32 v144, off, s33 offset:356
; GISEL64-NEXT: scratch_load_b32 v145, off, s33 offset:360
; GISEL64-NEXT: scratch_load_b32 v146, off, s33 offset:364
; GISEL64-NEXT: scratch_load_b32 v147, off, s33 offset:368
; GISEL64-NEXT: scratch_load_b32 v148, off, s33 offset:372
; GISEL64-NEXT: scratch_load_b32 v149, off, s33 offset:376
; GISEL64-NEXT: scratch_load_b32 v150, off, s33 offset:380
; GISEL64-NEXT: scratch_load_b32 v151, off, s33 offset:384
; GISEL64-NEXT: s_clause 0x1f
; GISEL64-NEXT: scratch_load_b32 v160, off, s33 offset:388
; GISEL64-NEXT: scratch_load_b32 v161, off, s33 offset:392
; GISEL64-NEXT: scratch_load_b32 v162, off, s33 offset:396
; GISEL64-NEXT: scratch_load_b32 v163, off, s33 offset:400
; GISEL64-NEXT: scratch_load_b32 v164, off, s33 offset:404
; GISEL64-NEXT: scratch_load_b32 v165, off, s33 offset:408
; GISEL64-NEXT: scratch_load_b32 v166, off, s33 offset:412
; GISEL64-NEXT: scratch_load_b32 v167, off, s33 offset:416
; GISEL64-NEXT: scratch_load_b32 v176, off, s33 offset:420
; GISEL64-NEXT: scratch_load_b32 v177, off, s33 offset:424
; GISEL64-NEXT: scratch_load_b32 v178, off, s33 offset:428
; GISEL64-NEXT: scratch_load_b32 v179, off, s33 offset:432
; GISEL64-NEXT: scratch_load_b32 v180, off, s33 offset:436
; GISEL64-NEXT: scratch_load_b32 v181, off, s33 offset:440
; GISEL64-NEXT: scratch_load_b32 v182, off, s33 offset:444
; GISEL64-NEXT: scratch_load_b32 v183, off, s33 offset:448
; GISEL64-NEXT: scratch_load_b32 v192, off, s33 offset:452
; GISEL64-NEXT: scratch_load_b32 v193, off, s33 offset:456
; GISEL64-NEXT: scratch_load_b32 v194, off, s33 offset:460
; GISEL64-NEXT: scratch_load_b32 v195, off, s33 offset:464
; GISEL64-NEXT: scratch_load_b32 v196, off, s33 offset:468
; GISEL64-NEXT: scratch_load_b32 v197, off, s33 offset:472
; GISEL64-NEXT: scratch_load_b32 v198, off, s33 offset:476
; GISEL64-NEXT: scratch_load_b32 v199, off, s33 offset:480
; GISEL64-NEXT: scratch_load_b32 v208, off, s33 offset:484
; GISEL64-NEXT: scratch_load_b32 v209, off, s33 offset:488
; GISEL64-NEXT: scratch_load_b32 v210, off, s33 offset:492
; GISEL64-NEXT: scratch_load_b32 v211, off, s33 offset:496
; GISEL64-NEXT: scratch_load_b32 v212, off, s33 offset:500
; GISEL64-NEXT: scratch_load_b32 v213, off, s33 offset:504
; GISEL64-NEXT: scratch_load_b32 v214, off, s33 offset:508
; GISEL64-NEXT: scratch_load_b32 v215, off, s33 offset:512
; GISEL64-NEXT: s_clause 0xf
; GISEL64-NEXT: scratch_load_b32 v224, off, s33 offset:516
; GISEL64-NEXT: scratch_load_b32 v225, off, s33 offset:520
; GISEL64-NEXT: scratch_load_b32 v226, off, s33 offset:524
; GISEL64-NEXT: scratch_load_b32 v227, off, s33 offset:528
; GISEL64-NEXT: scratch_load_b32 v228, off, s33 offset:532
; GISEL64-NEXT: scratch_load_b32 v229, off, s33 offset:536
; GISEL64-NEXT: scratch_load_b32 v230, off, s33 offset:540
; GISEL64-NEXT: scratch_load_b32 v231, off, s33 offset:544
; GISEL64-NEXT: scratch_load_b32 v240, off, s33 offset:548
; GISEL64-NEXT: scratch_load_b32 v241, off, s33 offset:552
; GISEL64-NEXT: scratch_load_b32 v242, off, s33 offset:556
; GISEL64-NEXT: scratch_load_b32 v243, off, s33 offset:560
; GISEL64-NEXT: scratch_load_b32 v244, off, s33 offset:564
; GISEL64-NEXT: scratch_load_b32 v245, off, s33 offset:568
; GISEL64-NEXT: scratch_load_b32 v246, off, s33 offset:572
; GISEL64-NEXT: scratch_load_b32 v247, off, s33 offset:576
; GISEL64-NEXT: s_mov_b64 exec, s[4:5]
; GISEL64-NEXT: s_mov_b32 s33, s0
; GISEL64-NEXT: s_wait_loadcnt 0x0
; GISEL64-NEXT: s_wait_alu 0xfffe
; GISEL64-NEXT: s_setpc_b64 s[30:31]
%ret = call amdgpu_gfx <2 x half>(<2 x half>, <2 x half>) @gfx_callee(<2 x half> %y, <2 x half> %x) convergent
ret <2 x half> %ret
}
declare amdgpu_gfx_whole_wave float @callee(i1 %active, <8 x float> %x)
define amdgpu_cs void @call_from_entry(<8 x float> %x, ptr %p) {
; DAGISEL-LABEL: call_from_entry:
; DAGISEL: ; %bb.0:
; DAGISEL-NEXT: s_mov_b32 s1, callee@abs32@hi
; DAGISEL-NEXT: s_mov_b32 s0, callee@abs32@lo
; DAGISEL-NEXT: s_mov_b32 s32, 0
; DAGISEL-NEXT: v_dual_mov_b32 v41, v9 :: v_dual_mov_b32 v40, v8
; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
; DAGISEL-NEXT: flat_store_b32 v[40:41], v0
; DAGISEL-NEXT: s_endpgm
;
; GISEL-LABEL: call_from_entry:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_mov_b32 s0, callee@abs32@lo
; GISEL-NEXT: s_mov_b32 s1, callee@abs32@hi
; GISEL-NEXT: s_mov_b32 s32, 0
; GISEL-NEXT: v_dual_mov_b32 v40, v8 :: v_dual_mov_b32 v41, v9
; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GISEL-NEXT: flat_store_b32 v[40:41], v0
; GISEL-NEXT: s_endpgm
;
; DAGISEL64-LABEL: call_from_entry:
; DAGISEL64: ; %bb.0:
; DAGISEL64-NEXT: s_mov_b32 s1, callee@abs32@hi
; DAGISEL64-NEXT: s_mov_b32 s0, callee@abs32@lo
; DAGISEL64-NEXT: s_mov_b32 s32, 0
; DAGISEL64-NEXT: v_mov_b32_e32 v41, v9
; DAGISEL64-NEXT: v_mov_b32_e32 v40, v8
; DAGISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1]
; DAGISEL64-NEXT: flat_store_b32 v[40:41], v0
; DAGISEL64-NEXT: s_endpgm
;
; GISEL64-LABEL: call_from_entry:
; GISEL64: ; %bb.0:
; GISEL64-NEXT: s_mov_b32 s0, callee@abs32@lo
; GISEL64-NEXT: s_mov_b32 s1, callee@abs32@hi
; GISEL64-NEXT: s_mov_b32 s32, 0
; GISEL64-NEXT: v_mov_b32_e32 v40, v8
; GISEL64-NEXT: v_mov_b32_e32 v41, v9
; GISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GISEL64-NEXT: flat_store_b32 v[40:41], v0
; GISEL64-NEXT: s_endpgm
%ret = call float(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @callee, <8 x float> %x) convergent
store float %ret, ptr %p
ret void
}
define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> %x, ptr %p) {
; DAGISEL-LABEL: call_from_whole_wave:
; DAGISEL: ; %bb.0:
; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL-NEXT: s_wait_expcnt 0x0
; DAGISEL-NEXT: s_wait_samplecnt 0x0
; DAGISEL-NEXT: s_wait_bvhcnt 0x0
; DAGISEL-NEXT: s_wait_kmcnt 0x0
; DAGISEL-NEXT: s_mov_b32 s0, s33
; DAGISEL-NEXT: s_mov_b32 s33, s32
; DAGISEL-NEXT: s_xor_saveexec_b32 s4, -1
; DAGISEL-NEXT: s_clause 0x1f
; DAGISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4
; DAGISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8
; DAGISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12
; DAGISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16
; DAGISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20
; DAGISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24
; DAGISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28
; DAGISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32
; DAGISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36
; DAGISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40
; DAGISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44
; DAGISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48
; DAGISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52
; DAGISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56
; DAGISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60
; DAGISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64
; DAGISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68
; DAGISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72
; DAGISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76
; DAGISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80
; DAGISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84
; DAGISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88
; DAGISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92
; DAGISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96
; DAGISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100
; DAGISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104
; DAGISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108
; DAGISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112
; DAGISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116
; DAGISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120
; DAGISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124
; DAGISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128
; DAGISEL-NEXT: s_clause 0x1f
; DAGISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132
; DAGISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136
; DAGISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140
; DAGISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144
; DAGISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148
; DAGISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152
; DAGISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156
; DAGISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160
; DAGISEL-NEXT: scratch_store_b32 off, v48, s33 offset:172
; DAGISEL-NEXT: scratch_store_b32 off, v49, s33 offset:176
; DAGISEL-NEXT: scratch_store_b32 off, v50, s33 offset:180
; DAGISEL-NEXT: scratch_store_b32 off, v51, s33 offset:184
; DAGISEL-NEXT: scratch_store_b32 off, v52, s33 offset:188
; DAGISEL-NEXT: scratch_store_b32 off, v53, s33 offset:192
; DAGISEL-NEXT: scratch_store_b32 off, v54, s33 offset:196
; DAGISEL-NEXT: scratch_store_b32 off, v55, s33 offset:200
; DAGISEL-NEXT: scratch_store_b32 off, v64, s33 offset:204
; DAGISEL-NEXT: scratch_store_b32 off, v65, s33 offset:208
; DAGISEL-NEXT: scratch_store_b32 off, v66, s33 offset:212
; DAGISEL-NEXT: scratch_store_b32 off, v67, s33 offset:216
; DAGISEL-NEXT: scratch_store_b32 off, v68, s33 offset:220
; DAGISEL-NEXT: scratch_store_b32 off, v69, s33 offset:224
; DAGISEL-NEXT: scratch_store_b32 off, v70, s33 offset:228
; DAGISEL-NEXT: scratch_store_b32 off, v71, s33 offset:232
; DAGISEL-NEXT: scratch_store_b32 off, v80, s33 offset:236
; DAGISEL-NEXT: scratch_store_b32 off, v81, s33 offset:240
; DAGISEL-NEXT: scratch_store_b32 off, v82, s33 offset:244
; DAGISEL-NEXT: scratch_store_b32 off, v83, s33 offset:248
; DAGISEL-NEXT: scratch_store_b32 off, v84, s33 offset:252
; DAGISEL-NEXT: scratch_store_b32 off, v85, s33 offset:256
; DAGISEL-NEXT: scratch_store_b32 off, v86, s33 offset:260
; DAGISEL-NEXT: scratch_store_b32 off, v87, s33 offset:264
; DAGISEL-NEXT: s_clause 0x1f
; DAGISEL-NEXT: scratch_store_b32 off, v96, s33 offset:268
; DAGISEL-NEXT: scratch_store_b32 off, v97, s33 offset:272
; DAGISEL-NEXT: scratch_store_b32 off, v98, s33 offset:276
; DAGISEL-NEXT: scratch_store_b32 off, v99, s33 offset:280
; DAGISEL-NEXT: scratch_store_b32 off, v100, s33 offset:284
; DAGISEL-NEXT: scratch_store_b32 off, v101, s33 offset:288
; DAGISEL-NEXT: scratch_store_b32 off, v102, s33 offset:292
; DAGISEL-NEXT: scratch_store_b32 off, v103, s33 offset:296
; DAGISEL-NEXT: scratch_store_b32 off, v112, s33 offset:300
; DAGISEL-NEXT: scratch_store_b32 off, v113, s33 offset:304
; DAGISEL-NEXT: scratch_store_b32 off, v114, s33 offset:308
; DAGISEL-NEXT: scratch_store_b32 off, v115, s33 offset:312
; DAGISEL-NEXT: scratch_store_b32 off, v116, s33 offset:316
; DAGISEL-NEXT: scratch_store_b32 off, v117, s33 offset:320
; DAGISEL-NEXT: scratch_store_b32 off, v118, s33 offset:324
; DAGISEL-NEXT: scratch_store_b32 off, v119, s33 offset:328
; DAGISEL-NEXT: scratch_store_b32 off, v128, s33 offset:332
; DAGISEL-NEXT: scratch_store_b32 off, v129, s33 offset:336
; DAGISEL-NEXT: scratch_store_b32 off, v130, s33 offset:340
; DAGISEL-NEXT: scratch_store_b32 off, v131, s33 offset:344
; DAGISEL-NEXT: scratch_store_b32 off, v132, s33 offset:348
; DAGISEL-NEXT: scratch_store_b32 off, v133, s33 offset:352
; DAGISEL-NEXT: scratch_store_b32 off, v134, s33 offset:356
; DAGISEL-NEXT: scratch_store_b32 off, v135, s33 offset:360
; DAGISEL-NEXT: scratch_store_b32 off, v144, s33 offset:364
; DAGISEL-NEXT: scratch_store_b32 off, v145, s33 offset:368
; DAGISEL-NEXT: scratch_store_b32 off, v146, s33 offset:372
; DAGISEL-NEXT: scratch_store_b32 off, v147, s33 offset:376
; DAGISEL-NEXT: scratch_store_b32 off, v148, s33 offset:380
; DAGISEL-NEXT: scratch_store_b32 off, v149, s33 offset:384
; DAGISEL-NEXT: scratch_store_b32 off, v150, s33 offset:388
; DAGISEL-NEXT: scratch_store_b32 off, v151, s33 offset:392
; DAGISEL-NEXT: s_clause 0x1f
; DAGISEL-NEXT: scratch_store_b32 off, v160, s33 offset:396
; DAGISEL-NEXT: scratch_store_b32 off, v161, s33 offset:400
; DAGISEL-NEXT: scratch_store_b32 off, v162, s33 offset:404
; DAGISEL-NEXT: scratch_store_b32 off, v163, s33 offset:408
; DAGISEL-NEXT: scratch_store_b32 off, v164, s33 offset:412
; DAGISEL-NEXT: scratch_store_b32 off, v165, s33 offset:416
; DAGISEL-NEXT: scratch_store_b32 off, v166, s33 offset:420
; DAGISEL-NEXT: scratch_store_b32 off, v167, s33 offset:424
; DAGISEL-NEXT: scratch_store_b32 off, v176, s33 offset:428
; DAGISEL-NEXT: scratch_store_b32 off, v177, s33 offset:432
; DAGISEL-NEXT: scratch_store_b32 off, v178, s33 offset:436
; DAGISEL-NEXT: scratch_store_b32 off, v179, s33 offset:440
; DAGISEL-NEXT: scratch_store_b32 off, v180, s33 offset:444
; DAGISEL-NEXT: scratch_store_b32 off, v181, s33 offset:448
; DAGISEL-NEXT: scratch_store_b32 off, v182, s33 offset:452
; DAGISEL-NEXT: scratch_store_b32 off, v183, s33 offset:456
; DAGISEL-NEXT: scratch_store_b32 off, v192, s33 offset:460
; DAGISEL-NEXT: scratch_store_b32 off, v193, s33 offset:464
; DAGISEL-NEXT: scratch_store_b32 off, v194, s33 offset:468
; DAGISEL-NEXT: scratch_store_b32 off, v195, s33 offset:472
; DAGISEL-NEXT: scratch_store_b32 off, v196, s33 offset:476
; DAGISEL-NEXT: scratch_store_b32 off, v197, s33 offset:480
; DAGISEL-NEXT: scratch_store_b32 off, v198, s33 offset:484
; DAGISEL-NEXT: scratch_store_b32 off, v199, s33 offset:488
; DAGISEL-NEXT: scratch_store_b32 off, v208, s33 offset:492
; DAGISEL-NEXT: scratch_store_b32 off, v209, s33 offset:496
; DAGISEL-NEXT: scratch_store_b32 off, v210, s33 offset:500
; DAGISEL-NEXT: scratch_store_b32 off, v211, s33 offset:504
; DAGISEL-NEXT: scratch_store_b32 off, v212, s33 offset:508
; DAGISEL-NEXT: scratch_store_b32 off, v213, s33 offset:512
; DAGISEL-NEXT: scratch_store_b32 off, v214, s33 offset:516
; DAGISEL-NEXT: scratch_store_b32 off, v215, s33 offset:520
; DAGISEL-NEXT: s_clause 0xf
; DAGISEL-NEXT: scratch_store_b32 off, v224, s33 offset:524
; DAGISEL-NEXT: scratch_store_b32 off, v225, s33 offset:528
; DAGISEL-NEXT: scratch_store_b32 off, v226, s33 offset:532
; DAGISEL-NEXT: scratch_store_b32 off, v227, s33 offset:536
; DAGISEL-NEXT: scratch_store_b32 off, v228, s33 offset:540
; DAGISEL-NEXT: scratch_store_b32 off, v229, s33 offset:544
; DAGISEL-NEXT: scratch_store_b32 off, v230, s33 offset:548
; DAGISEL-NEXT: scratch_store_b32 off, v231, s33 offset:552
; DAGISEL-NEXT: scratch_store_b32 off, v240, s33 offset:556
; DAGISEL-NEXT: scratch_store_b32 off, v241, s33 offset:560
; DAGISEL-NEXT: scratch_store_b32 off, v242, s33 offset:564
; DAGISEL-NEXT: scratch_store_b32 off, v243, s33 offset:568
; DAGISEL-NEXT: scratch_store_b32 off, v244, s33 offset:572
; DAGISEL-NEXT: scratch_store_b32 off, v245, s33 offset:576
; DAGISEL-NEXT: scratch_store_b32 off, v246, s33 offset:580
; DAGISEL-NEXT: scratch_store_b32 off, v247, s33 offset:584
; DAGISEL-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-NEXT: s_clause 0x2
; DAGISEL-NEXT: scratch_store_b32 off, v42, s33
; DAGISEL-NEXT: scratch_store_b32 off, v40, s33 offset:164
; DAGISEL-NEXT: scratch_store_b32 off, v41, s33 offset:168
; DAGISEL-NEXT: s_wait_alu 0xfffe
; DAGISEL-NEXT: v_writelane_b32 v42, s0, 3
; DAGISEL-NEXT: s_mov_b32 s1, callee@abs32@hi
; DAGISEL-NEXT: s_mov_b32 s0, callee@abs32@lo
; DAGISEL-NEXT: s_addk_co_i32 s32, 0x250
; DAGISEL-NEXT: v_dual_mov_b32 v41, v9 :: v_dual_mov_b32 v40, v8
; DAGISEL-NEXT: v_writelane_b32 v42, s4, 0
; DAGISEL-NEXT: v_writelane_b32 v42, s30, 1
; DAGISEL-NEXT: v_writelane_b32 v42, s31, 2
; DAGISEL-NEXT: s_wait_alu 0xfffe
; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
; DAGISEL-NEXT: flat_store_b32 v[40:41], v0
; DAGISEL-NEXT: v_readlane_b32 s31, v42, 2
; DAGISEL-NEXT: v_readlane_b32 s30, v42, 1
; DAGISEL-NEXT: v_readlane_b32 s4, v42, 0
; DAGISEL-NEXT: v_readlane_b32 s0, v42, 3
; DAGISEL-NEXT: s_clause 0x2
; DAGISEL-NEXT: scratch_load_b32 v42, off, s33
; DAGISEL-NEXT: scratch_load_b32 v40, off, s33 offset:164
; DAGISEL-NEXT: scratch_load_b32 v41, off, s33 offset:168
; DAGISEL-NEXT: s_mov_b32 s32, s33
; DAGISEL-NEXT: s_xor_b32 exec_lo, s4, -1
; DAGISEL-NEXT: s_clause 0x1f
; DAGISEL-NEXT: scratch_load_b32 v0, off, s33 offset:4
; DAGISEL-NEXT: scratch_load_b32 v1, off, s33 offset:8
; DAGISEL-NEXT: scratch_load_b32 v2, off, s33 offset:12
; DAGISEL-NEXT: scratch_load_b32 v3, off, s33 offset:16
; DAGISEL-NEXT: scratch_load_b32 v4, off, s33 offset:20
; DAGISEL-NEXT: scratch_load_b32 v5, off, s33 offset:24
; DAGISEL-NEXT: scratch_load_b32 v6, off, s33 offset:28
; DAGISEL-NEXT: scratch_load_b32 v7, off, s33 offset:32
; DAGISEL-NEXT: scratch_load_b32 v8, off, s33 offset:36
; DAGISEL-NEXT: scratch_load_b32 v9, off, s33 offset:40
; DAGISEL-NEXT: scratch_load_b32 v10, off, s33 offset:44
; DAGISEL-NEXT: scratch_load_b32 v11, off, s33 offset:48
; DAGISEL-NEXT: scratch_load_b32 v12, off, s33 offset:52
; DAGISEL-NEXT: scratch_load_b32 v13, off, s33 offset:56
; DAGISEL-NEXT: scratch_load_b32 v14, off, s33 offset:60
; DAGISEL-NEXT: scratch_load_b32 v15, off, s33 offset:64
; DAGISEL-NEXT: scratch_load_b32 v16, off, s33 offset:68
; DAGISEL-NEXT: scratch_load_b32 v17, off, s33 offset:72
; DAGISEL-NEXT: scratch_load_b32 v18, off, s33 offset:76
; DAGISEL-NEXT: scratch_load_b32 v19, off, s33 offset:80
; DAGISEL-NEXT: scratch_load_b32 v20, off, s33 offset:84
; DAGISEL-NEXT: scratch_load_b32 v21, off, s33 offset:88
; DAGISEL-NEXT: scratch_load_b32 v22, off, s33 offset:92
; DAGISEL-NEXT: scratch_load_b32 v23, off, s33 offset:96
; DAGISEL-NEXT: scratch_load_b32 v24, off, s33 offset:100
; DAGISEL-NEXT: scratch_load_b32 v25, off, s33 offset:104
; DAGISEL-NEXT: scratch_load_b32 v26, off, s33 offset:108
; DAGISEL-NEXT: scratch_load_b32 v27, off, s33 offset:112
; DAGISEL-NEXT: scratch_load_b32 v28, off, s33 offset:116
; DAGISEL-NEXT: scratch_load_b32 v29, off, s33 offset:120
; DAGISEL-NEXT: scratch_load_b32 v30, off, s33 offset:124
; DAGISEL-NEXT: scratch_load_b32 v31, off, s33 offset:128
; DAGISEL-NEXT: s_clause 0x1f
; DAGISEL-NEXT: scratch_load_b32 v32, off, s33 offset:132
; DAGISEL-NEXT: scratch_load_b32 v33, off, s33 offset:136
; DAGISEL-NEXT: scratch_load_b32 v34, off, s33 offset:140
; DAGISEL-NEXT: scratch_load_b32 v35, off, s33 offset:144
; DAGISEL-NEXT: scratch_load_b32 v36, off, s33 offset:148
; DAGISEL-NEXT: scratch_load_b32 v37, off, s33 offset:152
; DAGISEL-NEXT: scratch_load_b32 v38, off, s33 offset:156
; DAGISEL-NEXT: scratch_load_b32 v39, off, s33 offset:160
; DAGISEL-NEXT: scratch_load_b32 v48, off, s33 offset:172
; DAGISEL-NEXT: scratch_load_b32 v49, off, s33 offset:176
; DAGISEL-NEXT: scratch_load_b32 v50, off, s33 offset:180
; DAGISEL-NEXT: scratch_load_b32 v51, off, s33 offset:184
; DAGISEL-NEXT: scratch_load_b32 v52, off, s33 offset:188
; DAGISEL-NEXT: scratch_load_b32 v53, off, s33 offset:192
; DAGISEL-NEXT: scratch_load_b32 v54, off, s33 offset:196
; DAGISEL-NEXT: scratch_load_b32 v55, off, s33 offset:200
; DAGISEL-NEXT: scratch_load_b32 v64, off, s33 offset:204
; DAGISEL-NEXT: scratch_load_b32 v65, off, s33 offset:208
; DAGISEL-NEXT: scratch_load_b32 v66, off, s33 offset:212
; DAGISEL-NEXT: scratch_load_b32 v67, off, s33 offset:216
; DAGISEL-NEXT: scratch_load_b32 v68, off, s33 offset:220
; DAGISEL-NEXT: scratch_load_b32 v69, off, s33 offset:224
; DAGISEL-NEXT: scratch_load_b32 v70, off, s33 offset:228
; DAGISEL-NEXT: scratch_load_b32 v71, off, s33 offset:232
; DAGISEL-NEXT: scratch_load_b32 v80, off, s33 offset:236
; DAGISEL-NEXT: scratch_load_b32 v81, off, s33 offset:240
; DAGISEL-NEXT: scratch_load_b32 v82, off, s33 offset:244
; DAGISEL-NEXT: scratch_load_b32 v83, off, s33 offset:248
; DAGISEL-NEXT: scratch_load_b32 v84, off, s33 offset:252
; DAGISEL-NEXT: scratch_load_b32 v85, off, s33 offset:256
; DAGISEL-NEXT: scratch_load_b32 v86, off, s33 offset:260
; DAGISEL-NEXT: scratch_load_b32 v87, off, s33 offset:264
; DAGISEL-NEXT: s_clause 0x1f
; DAGISEL-NEXT: scratch_load_b32 v96, off, s33 offset:268
; DAGISEL-NEXT: scratch_load_b32 v97, off, s33 offset:272
; DAGISEL-NEXT: scratch_load_b32 v98, off, s33 offset:276
; DAGISEL-NEXT: scratch_load_b32 v99, off, s33 offset:280
; DAGISEL-NEXT: scratch_load_b32 v100, off, s33 offset:284
; DAGISEL-NEXT: scratch_load_b32 v101, off, s33 offset:288
; DAGISEL-NEXT: scratch_load_b32 v102, off, s33 offset:292
; DAGISEL-NEXT: scratch_load_b32 v103, off, s33 offset:296
; DAGISEL-NEXT: scratch_load_b32 v112, off, s33 offset:300
; DAGISEL-NEXT: scratch_load_b32 v113, off, s33 offset:304
; DAGISEL-NEXT: scratch_load_b32 v114, off, s33 offset:308
; DAGISEL-NEXT: scratch_load_b32 v115, off, s33 offset:312
; DAGISEL-NEXT: scratch_load_b32 v116, off, s33 offset:316
; DAGISEL-NEXT: scratch_load_b32 v117, off, s33 offset:320
; DAGISEL-NEXT: scratch_load_b32 v118, off, s33 offset:324
; DAGISEL-NEXT: scratch_load_b32 v119, off, s33 offset:328
; DAGISEL-NEXT: scratch_load_b32 v128, off, s33 offset:332
; DAGISEL-NEXT: scratch_load_b32 v129, off, s33 offset:336
; DAGISEL-NEXT: scratch_load_b32 v130, off, s33 offset:340
; DAGISEL-NEXT: scratch_load_b32 v131, off, s33 offset:344
; DAGISEL-NEXT: scratch_load_b32 v132, off, s33 offset:348
; DAGISEL-NEXT: scratch_load_b32 v133, off, s33 offset:352
; DAGISEL-NEXT: scratch_load_b32 v134, off, s33 offset:356
; DAGISEL-NEXT: scratch_load_b32 v135, off, s33 offset:360
; DAGISEL-NEXT: scratch_load_b32 v144, off, s33 offset:364
; DAGISEL-NEXT: scratch_load_b32 v145, off, s33 offset:368
; DAGISEL-NEXT: scratch_load_b32 v146, off, s33 offset:372
; DAGISEL-NEXT: scratch_load_b32 v147, off, s33 offset:376
; DAGISEL-NEXT: scratch_load_b32 v148, off, s33 offset:380
; DAGISEL-NEXT: scratch_load_b32 v149, off, s33 offset:384
; DAGISEL-NEXT: scratch_load_b32 v150, off, s33 offset:388
; DAGISEL-NEXT: scratch_load_b32 v151, off, s33 offset:392
; DAGISEL-NEXT: s_clause 0x1f
; DAGISEL-NEXT: scratch_load_b32 v160, off, s33 offset:396
; DAGISEL-NEXT: scratch_load_b32 v161, off, s33 offset:400
; DAGISEL-NEXT: scratch_load_b32 v162, off, s33 offset:404
; DAGISEL-NEXT: scratch_load_b32 v163, off, s33 offset:408
; DAGISEL-NEXT: scratch_load_b32 v164, off, s33 offset:412
; DAGISEL-NEXT: scratch_load_b32 v165, off, s33 offset:416
; DAGISEL-NEXT: scratch_load_b32 v166, off, s33 offset:420
; DAGISEL-NEXT: scratch_load_b32 v167, off, s33 offset:424
; DAGISEL-NEXT: scratch_load_b32 v176, off, s33 offset:428
; DAGISEL-NEXT: scratch_load_b32 v177, off, s33 offset:432
; DAGISEL-NEXT: scratch_load_b32 v178, off, s33 offset:436
; DAGISEL-NEXT: scratch_load_b32 v179, off, s33 offset:440
; DAGISEL-NEXT: scratch_load_b32 v180, off, s33 offset:444
; DAGISEL-NEXT: scratch_load_b32 v181, off, s33 offset:448
; DAGISEL-NEXT: scratch_load_b32 v182, off, s33 offset:452
; DAGISEL-NEXT: scratch_load_b32 v183, off, s33 offset:456
; DAGISEL-NEXT: scratch_load_b32 v192, off, s33 offset:460
; DAGISEL-NEXT: scratch_load_b32 v193, off, s33 offset:464
; DAGISEL-NEXT: scratch_load_b32 v194, off, s33 offset:468
; DAGISEL-NEXT: scratch_load_b32 v195, off, s33 offset:472
; DAGISEL-NEXT: scratch_load_b32 v196, off, s33 offset:476
; DAGISEL-NEXT: scratch_load_b32 v197, off, s33 offset:480
; DAGISEL-NEXT: scratch_load_b32 v198, off, s33 offset:484
; DAGISEL-NEXT: scratch_load_b32 v199, off, s33 offset:488
; DAGISEL-NEXT: scratch_load_b32 v208, off, s33 offset:492
; DAGISEL-NEXT: scratch_load_b32 v209, off, s33 offset:496
; DAGISEL-NEXT: scratch_load_b32 v210, off, s33 offset:500
; DAGISEL-NEXT: scratch_load_b32 v211, off, s33 offset:504
; DAGISEL-NEXT: scratch_load_b32 v212, off, s33 offset:508
; DAGISEL-NEXT: scratch_load_b32 v213, off, s33 offset:512
; DAGISEL-NEXT: scratch_load_b32 v214, off, s33 offset:516
; DAGISEL-NEXT: scratch_load_b32 v215, off, s33 offset:520
; DAGISEL-NEXT: s_clause 0xf
; DAGISEL-NEXT: scratch_load_b32 v224, off, s33 offset:524
; DAGISEL-NEXT: scratch_load_b32 v225, off, s33 offset:528
; DAGISEL-NEXT: scratch_load_b32 v226, off, s33 offset:532
; DAGISEL-NEXT: scratch_load_b32 v227, off, s33 offset:536
; DAGISEL-NEXT: scratch_load_b32 v228, off, s33 offset:540
; DAGISEL-NEXT: scratch_load_b32 v229, off, s33 offset:544
; DAGISEL-NEXT: scratch_load_b32 v230, off, s33 offset:548
; DAGISEL-NEXT: scratch_load_b32 v231, off, s33 offset:552
; DAGISEL-NEXT: scratch_load_b32 v240, off, s33 offset:556
; DAGISEL-NEXT: scratch_load_b32 v241, off, s33 offset:560
; DAGISEL-NEXT: scratch_load_b32 v242, off, s33 offset:564
; DAGISEL-NEXT: scratch_load_b32 v243, off, s33 offset:568
; DAGISEL-NEXT: scratch_load_b32 v244, off, s33 offset:572
; DAGISEL-NEXT: scratch_load_b32 v245, off, s33 offset:576
; DAGISEL-NEXT: scratch_load_b32 v246, off, s33 offset:580
; DAGISEL-NEXT: scratch_load_b32 v247, off, s33 offset:584
; DAGISEL-NEXT: s_mov_b32 exec_lo, s4
; DAGISEL-NEXT: s_mov_b32 s33, s0
; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL-NEXT: s_wait_alu 0xfffe
; DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: call_from_whole_wave:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-NEXT: s_wait_expcnt 0x0
; GISEL-NEXT: s_wait_samplecnt 0x0
; GISEL-NEXT: s_wait_bvhcnt 0x0
; GISEL-NEXT: s_wait_kmcnt 0x0
; GISEL-NEXT: s_mov_b32 s0, s33
; GISEL-NEXT: s_mov_b32 s33, s32
; GISEL-NEXT: s_xor_saveexec_b32 s4, -1
; GISEL-NEXT: s_clause 0x1f
; GISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4
; GISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8
; GISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12
; GISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16
; GISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20
; GISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24
; GISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28
; GISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32
; GISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36
; GISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40
; GISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44
; GISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48
; GISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52
; GISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56
; GISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60
; GISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64
; GISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68
; GISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72
; GISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76
; GISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80
; GISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84
; GISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88
; GISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92
; GISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96
; GISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100
; GISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104
; GISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108
; GISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112
; GISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116
; GISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120
; GISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124
; GISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128
; GISEL-NEXT: s_clause 0x1f
; GISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132
; GISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136
; GISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140
; GISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144
; GISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148
; GISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152
; GISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156
; GISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160
; GISEL-NEXT: scratch_store_b32 off, v48, s33 offset:172
; GISEL-NEXT: scratch_store_b32 off, v49, s33 offset:176
; GISEL-NEXT: scratch_store_b32 off, v50, s33 offset:180
; GISEL-NEXT: scratch_store_b32 off, v51, s33 offset:184
; GISEL-NEXT: scratch_store_b32 off, v52, s33 offset:188
; GISEL-NEXT: scratch_store_b32 off, v53, s33 offset:192
; GISEL-NEXT: scratch_store_b32 off, v54, s33 offset:196
; GISEL-NEXT: scratch_store_b32 off, v55, s33 offset:200
; GISEL-NEXT: scratch_store_b32 off, v64, s33 offset:204
; GISEL-NEXT: scratch_store_b32 off, v65, s33 offset:208
; GISEL-NEXT: scratch_store_b32 off, v66, s33 offset:212
; GISEL-NEXT: scratch_store_b32 off, v67, s33 offset:216
; GISEL-NEXT: scratch_store_b32 off, v68, s33 offset:220
; GISEL-NEXT: scratch_store_b32 off, v69, s33 offset:224
; GISEL-NEXT: scratch_store_b32 off, v70, s33 offset:228
; GISEL-NEXT: scratch_store_b32 off, v71, s33 offset:232
; GISEL-NEXT: scratch_store_b32 off, v80, s33 offset:236
; GISEL-NEXT: scratch_store_b32 off, v81, s33 offset:240
; GISEL-NEXT: scratch_store_b32 off, v82, s33 offset:244
; GISEL-NEXT: scratch_store_b32 off, v83, s33 offset:248
; GISEL-NEXT: scratch_store_b32 off, v84, s33 offset:252
; GISEL-NEXT: scratch_store_b32 off, v85, s33 offset:256
; GISEL-NEXT: scratch_store_b32 off, v86, s33 offset:260
; GISEL-NEXT: scratch_store_b32 off, v87, s33 offset:264
; GISEL-NEXT: s_clause 0x1f
; GISEL-NEXT: scratch_store_b32 off, v96, s33 offset:268
; GISEL-NEXT: scratch_store_b32 off, v97, s33 offset:272
; GISEL-NEXT: scratch_store_b32 off, v98, s33 offset:276
; GISEL-NEXT: scratch_store_b32 off, v99, s33 offset:280
; GISEL-NEXT: scratch_store_b32 off, v100, s33 offset:284
; GISEL-NEXT: scratch_store_b32 off, v101, s33 offset:288
; GISEL-NEXT: scratch_store_b32 off, v102, s33 offset:292
; GISEL-NEXT: scratch_store_b32 off, v103, s33 offset:296
; GISEL-NEXT: scratch_store_b32 off, v112, s33 offset:300
; GISEL-NEXT: scratch_store_b32 off, v113, s33 offset:304
; GISEL-NEXT: scratch_store_b32 off, v114, s33 offset:308
; GISEL-NEXT: scratch_store_b32 off, v115, s33 offset:312
; GISEL-NEXT: scratch_store_b32 off, v116, s33 offset:316
; GISEL-NEXT: scratch_store_b32 off, v117, s33 offset:320
; GISEL-NEXT: scratch_store_b32 off, v118, s33 offset:324
; GISEL-NEXT: scratch_store_b32 off, v119, s33 offset:328
; GISEL-NEXT: scratch_store_b32 off, v128, s33 offset:332
; GISEL-NEXT: scratch_store_b32 off, v129, s33 offset:336
; GISEL-NEXT: scratch_store_b32 off, v130, s33 offset:340
; GISEL-NEXT: scratch_store_b32 off, v131, s33 offset:344
; GISEL-NEXT: scratch_store_b32 off, v132, s33 offset:348
; GISEL-NEXT: scratch_store_b32 off, v133, s33 offset:352
; GISEL-NEXT: scratch_store_b32 off, v134, s33 offset:356
; GISEL-NEXT: scratch_store_b32 off, v135, s33 offset:360
; GISEL-NEXT: scratch_store_b32 off, v144, s33 offset:364
; GISEL-NEXT: scratch_store_b32 off, v145, s33 offset:368
; GISEL-NEXT: scratch_store_b32 off, v146, s33 offset:372
; GISEL-NEXT: scratch_store_b32 off, v147, s33 offset:376
; GISEL-NEXT: scratch_store_b32 off, v148, s33 offset:380
; GISEL-NEXT: scratch_store_b32 off, v149, s33 offset:384
; GISEL-NEXT: scratch_store_b32 off, v150, s33 offset:388
; GISEL-NEXT: scratch_store_b32 off, v151, s33 offset:392
; GISEL-NEXT: s_clause 0x1f
; GISEL-NEXT: scratch_store_b32 off, v160, s33 offset:396
; GISEL-NEXT: scratch_store_b32 off, v161, s33 offset:400
; GISEL-NEXT: scratch_store_b32 off, v162, s33 offset:404
; GISEL-NEXT: scratch_store_b32 off, v163, s33 offset:408
; GISEL-NEXT: scratch_store_b32 off, v164, s33 offset:412
; GISEL-NEXT: scratch_store_b32 off, v165, s33 offset:416
; GISEL-NEXT: scratch_store_b32 off, v166, s33 offset:420
; GISEL-NEXT: scratch_store_b32 off, v167, s33 offset:424
; GISEL-NEXT: scratch_store_b32 off, v176, s33 offset:428
; GISEL-NEXT: scratch_store_b32 off, v177, s33 offset:432
; GISEL-NEXT: scratch_store_b32 off, v178, s33 offset:436
; GISEL-NEXT: scratch_store_b32 off, v179, s33 offset:440
; GISEL-NEXT: scratch_store_b32 off, v180, s33 offset:444
; GISEL-NEXT: scratch_store_b32 off, v181, s33 offset:448
; GISEL-NEXT: scratch_store_b32 off, v182, s33 offset:452
; GISEL-NEXT: scratch_store_b32 off, v183, s33 offset:456
; GISEL-NEXT: scratch_store_b32 off, v192, s33 offset:460
; GISEL-NEXT: scratch_store_b32 off, v193, s33 offset:464
; GISEL-NEXT: scratch_store_b32 off, v194, s33 offset:468
; GISEL-NEXT: scratch_store_b32 off, v195, s33 offset:472
; GISEL-NEXT: scratch_store_b32 off, v196, s33 offset:476
; GISEL-NEXT: scratch_store_b32 off, v197, s33 offset:480
; GISEL-NEXT: scratch_store_b32 off, v198, s33 offset:484
; GISEL-NEXT: scratch_store_b32 off, v199, s33 offset:488
; GISEL-NEXT: scratch_store_b32 off, v208, s33 offset:492
; GISEL-NEXT: scratch_store_b32 off, v209, s33 offset:496
; GISEL-NEXT: scratch_store_b32 off, v210, s33 offset:500
; GISEL-NEXT: scratch_store_b32 off, v211, s33 offset:504
; GISEL-NEXT: scratch_store_b32 off, v212, s33 offset:508
; GISEL-NEXT: scratch_store_b32 off, v213, s33 offset:512
; GISEL-NEXT: scratch_store_b32 off, v214, s33 offset:516
; GISEL-NEXT: scratch_store_b32 off, v215, s33 offset:520
; GISEL-NEXT: s_clause 0xf
; GISEL-NEXT: scratch_store_b32 off, v224, s33 offset:524
; GISEL-NEXT: scratch_store_b32 off, v225, s33 offset:528
; GISEL-NEXT: scratch_store_b32 off, v226, s33 offset:532
; GISEL-NEXT: scratch_store_b32 off, v227, s33 offset:536
; GISEL-NEXT: scratch_store_b32 off, v228, s33 offset:540
; GISEL-NEXT: scratch_store_b32 off, v229, s33 offset:544
; GISEL-NEXT: scratch_store_b32 off, v230, s33 offset:548
; GISEL-NEXT: scratch_store_b32 off, v231, s33 offset:552
; GISEL-NEXT: scratch_store_b32 off, v240, s33 offset:556
; GISEL-NEXT: scratch_store_b32 off, v241, s33 offset:560
; GISEL-NEXT: scratch_store_b32 off, v242, s33 offset:564
; GISEL-NEXT: scratch_store_b32 off, v243, s33 offset:568
; GISEL-NEXT: scratch_store_b32 off, v244, s33 offset:572
; GISEL-NEXT: scratch_store_b32 off, v245, s33 offset:576
; GISEL-NEXT: scratch_store_b32 off, v246, s33 offset:580
; GISEL-NEXT: scratch_store_b32 off, v247, s33 offset:584
; GISEL-NEXT: s_mov_b32 exec_lo, -1
; GISEL-NEXT: s_clause 0x2
; GISEL-NEXT: scratch_store_b32 off, v42, s33
; GISEL-NEXT: scratch_store_b32 off, v40, s33 offset:164
; GISEL-NEXT: scratch_store_b32 off, v41, s33 offset:168
; GISEL-NEXT: s_wait_alu 0xfffe
; GISEL-NEXT: v_writelane_b32 v42, s0, 3
; GISEL-NEXT: s_mov_b32 s0, callee@abs32@lo
; GISEL-NEXT: s_mov_b32 s1, callee@abs32@hi
; GISEL-NEXT: s_addk_co_i32 s32, 0x250
; GISEL-NEXT: v_dual_mov_b32 v40, v8 :: v_dual_mov_b32 v41, v9
; GISEL-NEXT: v_writelane_b32 v42, s4, 0
; GISEL-NEXT: v_writelane_b32 v42, s30, 1
; GISEL-NEXT: v_writelane_b32 v42, s31, 2
; GISEL-NEXT: s_wait_alu 0xfffe
; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GISEL-NEXT: flat_store_b32 v[40:41], v0
; GISEL-NEXT: v_readlane_b32 s31, v42, 2
; GISEL-NEXT: v_readlane_b32 s30, v42, 1
; GISEL-NEXT: v_readlane_b32 s4, v42, 0
; GISEL-NEXT: v_readlane_b32 s0, v42, 3
; GISEL-NEXT: s_clause 0x2
; GISEL-NEXT: scratch_load_b32 v42, off, s33
; GISEL-NEXT: scratch_load_b32 v40, off, s33 offset:164
; GISEL-NEXT: scratch_load_b32 v41, off, s33 offset:168
; GISEL-NEXT: s_mov_b32 s32, s33
; GISEL-NEXT: s_xor_b32 exec_lo, s4, -1
; GISEL-NEXT: s_clause 0x1f
; GISEL-NEXT: scratch_load_b32 v0, off, s33 offset:4
; GISEL-NEXT: scratch_load_b32 v1, off, s33 offset:8
; GISEL-NEXT: scratch_load_b32 v2, off, s33 offset:12
; GISEL-NEXT: scratch_load_b32 v3, off, s33 offset:16
; GISEL-NEXT: scratch_load_b32 v4, off, s33 offset:20
; GISEL-NEXT: scratch_load_b32 v5, off, s33 offset:24
; GISEL-NEXT: scratch_load_b32 v6, off, s33 offset:28
; GISEL-NEXT: scratch_load_b32 v7, off, s33 offset:32
; GISEL-NEXT: scratch_load_b32 v8, off, s33 offset:36
; GISEL-NEXT: scratch_load_b32 v9, off, s33 offset:40
; GISEL-NEXT: scratch_load_b32 v10, off, s33 offset:44
; GISEL-NEXT: scratch_load_b32 v11, off, s33 offset:48
; GISEL-NEXT: scratch_load_b32 v12, off, s33 offset:52
; GISEL-NEXT: scratch_load_b32 v13, off, s33 offset:56
; GISEL-NEXT: scratch_load_b32 v14, off, s33 offset:60
; GISEL-NEXT: scratch_load_b32 v15, off, s33 offset:64
; GISEL-NEXT: scratch_load_b32 v16, off, s33 offset:68
; GISEL-NEXT: scratch_load_b32 v17, off, s33 offset:72
; GISEL-NEXT: scratch_load_b32 v18, off, s33 offset:76
; GISEL-NEXT: scratch_load_b32 v19, off, s33 offset:80
; GISEL-NEXT: scratch_load_b32 v20, off, s33 offset:84
; GISEL-NEXT: scratch_load_b32 v21, off, s33 offset:88
; GISEL-NEXT: scratch_load_b32 v22, off, s33 offset:92
; GISEL-NEXT: scratch_load_b32 v23, off, s33 offset:96
; GISEL-NEXT: scratch_load_b32 v24, off, s33 offset:100
; GISEL-NEXT: scratch_load_b32 v25, off, s33 offset:104
; GISEL-NEXT: scratch_load_b32 v26, off, s33 offset:108
; GISEL-NEXT: scratch_load_b32 v27, off, s33 offset:112
; GISEL-NEXT: scratch_load_b32 v28, off, s33 offset:116
; GISEL-NEXT: scratch_load_b32 v29, off, s33 offset:120
; GISEL-NEXT: scratch_load_b32 v30, off, s33 offset:124
; GISEL-NEXT: scratch_load_b32 v31, off, s33 offset:128
; GISEL-NEXT: s_clause 0x1f
; GISEL-NEXT: scratch_load_b32 v32, off, s33 offset:132
; GISEL-NEXT: scratch_load_b32 v33, off, s33 offset:136
; GISEL-NEXT: scratch_load_b32 v34, off, s33 offset:140
; GISEL-NEXT: scratch_load_b32 v35, off, s33 offset:144
; GISEL-NEXT: scratch_load_b32 v36, off, s33 offset:148
; GISEL-NEXT: scratch_load_b32 v37, off, s33 offset:152
; GISEL-NEXT: scratch_load_b32 v38, off, s33 offset:156
; GISEL-NEXT: scratch_load_b32 v39, off, s33 offset:160
; GISEL-NEXT: scratch_load_b32 v48, off, s33 offset:172
; GISEL-NEXT: scratch_load_b32 v49, off, s33 offset:176
; GISEL-NEXT: scratch_load_b32 v50, off, s33 offset:180
; GISEL-NEXT: scratch_load_b32 v51, off, s33 offset:184
; GISEL-NEXT: scratch_load_b32 v52, off, s33 offset:188
; GISEL-NEXT: scratch_load_b32 v53, off, s33 offset:192
; GISEL-NEXT: scratch_load_b32 v54, off, s33 offset:196
; GISEL-NEXT: scratch_load_b32 v55, off, s33 offset:200
; GISEL-NEXT: scratch_load_b32 v64, off, s33 offset:204
; GISEL-NEXT: scratch_load_b32 v65, off, s33 offset:208
; GISEL-NEXT: scratch_load_b32 v66, off, s33 offset:212
; GISEL-NEXT: scratch_load_b32 v67, off, s33 offset:216
; GISEL-NEXT: scratch_load_b32 v68, off, s33 offset:220
; GISEL-NEXT: scratch_load_b32 v69, off, s33 offset:224
; GISEL-NEXT: scratch_load_b32 v70, off, s33 offset:228
; GISEL-NEXT: scratch_load_b32 v71, off, s33 offset:232
; GISEL-NEXT: scratch_load_b32 v80, off, s33 offset:236
; GISEL-NEXT: scratch_load_b32 v81, off, s33 offset:240
; GISEL-NEXT: scratch_load_b32 v82, off, s33 offset:244
; GISEL-NEXT: scratch_load_b32 v83, off, s33 offset:248
; GISEL-NEXT: scratch_load_b32 v84, off, s33 offset:252
; GISEL-NEXT: scratch_load_b32 v85, off, s33 offset:256
; GISEL-NEXT: scratch_load_b32 v86, off, s33 offset:260
; GISEL-NEXT: scratch_load_b32 v87, off, s33 offset:264
; GISEL-NEXT: s_clause 0x1f
; GISEL-NEXT: scratch_load_b32 v96, off, s33 offset:268
; GISEL-NEXT: scratch_load_b32 v97, off, s33 offset:272
; GISEL-NEXT: scratch_load_b32 v98, off, s33 offset:276
; GISEL-NEXT: scratch_load_b32 v99, off, s33 offset:280
; GISEL-NEXT: scratch_load_b32 v100, off, s33 offset:284
; GISEL-NEXT: scratch_load_b32 v101, off, s33 offset:288
; GISEL-NEXT: scratch_load_b32 v102, off, s33 offset:292
; GISEL-NEXT: scratch_load_b32 v103, off, s33 offset:296
; GISEL-NEXT: scratch_load_b32 v112, off, s33 offset:300
; GISEL-NEXT: scratch_load_b32 v113, off, s33 offset:304
; GISEL-NEXT: scratch_load_b32 v114, off, s33 offset:308
; GISEL-NEXT: scratch_load_b32 v115, off, s33 offset:312
; GISEL-NEXT: scratch_load_b32 v116, off, s33 offset:316
; GISEL-NEXT: scratch_load_b32 v117, off, s33 offset:320
; GISEL-NEXT: scratch_load_b32 v118, off, s33 offset:324
; GISEL-NEXT: scratch_load_b32 v119, off, s33 offset:328
; GISEL-NEXT: scratch_load_b32 v128, off, s33 offset:332
; GISEL-NEXT: scratch_load_b32 v129, off, s33 offset:336
; GISEL-NEXT: scratch_load_b32 v130, off, s33 offset:340
; GISEL-NEXT: scratch_load_b32 v131, off, s33 offset:344
; GISEL-NEXT: scratch_load_b32 v132, off, s33 offset:348
; GISEL-NEXT: scratch_load_b32 v133, off, s33 offset:352
; GISEL-NEXT: scratch_load_b32 v134, off, s33 offset:356
; GISEL-NEXT: scratch_load_b32 v135, off, s33 offset:360
; GISEL-NEXT: scratch_load_b32 v144, off, s33 offset:364
; GISEL-NEXT: scratch_load_b32 v145, off, s33 offset:368
; GISEL-NEXT: scratch_load_b32 v146, off, s33 offset:372
; GISEL-NEXT: scratch_load_b32 v147, off, s33 offset:376
; GISEL-NEXT: scratch_load_b32 v148, off, s33 offset:380
; GISEL-NEXT: scratch_load_b32 v149, off, s33 offset:384
; GISEL-NEXT: scratch_load_b32 v150, off, s33 offset:388
; GISEL-NEXT: scratch_load_b32 v151, off, s33 offset:392
; GISEL-NEXT: s_clause 0x1f
; GISEL-NEXT: scratch_load_b32 v160, off, s33 offset:396
; GISEL-NEXT: scratch_load_b32 v161, off, s33 offset:400
; GISEL-NEXT: scratch_load_b32 v162, off, s33 offset:404
; GISEL-NEXT: scratch_load_b32 v163, off, s33 offset:408
; GISEL-NEXT: scratch_load_b32 v164, off, s33 offset:412
; GISEL-NEXT: scratch_load_b32 v165, off, s33 offset:416
; GISEL-NEXT: scratch_load_b32 v166, off, s33 offset:420
; GISEL-NEXT: scratch_load_b32 v167, off, s33 offset:424
; GISEL-NEXT: scratch_load_b32 v176, off, s33 offset:428
; GISEL-NEXT: scratch_load_b32 v177, off, s33 offset:432
; GISEL-NEXT: scratch_load_b32 v178, off, s33 offset:436
; GISEL-NEXT: scratch_load_b32 v179, off, s33 offset:440
; GISEL-NEXT: scratch_load_b32 v180, off, s33 offset:444
; GISEL-NEXT: scratch_load_b32 v181, off, s33 offset:448
; GISEL-NEXT: scratch_load_b32 v182, off, s33 offset:452
; GISEL-NEXT: scratch_load_b32 v183, off, s33 offset:456
; GISEL-NEXT: scratch_load_b32 v192, off, s33 offset:460
; GISEL-NEXT: scratch_load_b32 v193, off, s33 offset:464
; GISEL-NEXT: scratch_load_b32 v194, off, s33 offset:468
; GISEL-NEXT: scratch_load_b32 v195, off, s33 offset:472
; GISEL-NEXT: scratch_load_b32 v196, off, s33 offset:476
; GISEL-NEXT: scratch_load_b32 v197, off, s33 offset:480
; GISEL-NEXT: scratch_load_b32 v198, off, s33 offset:484
; GISEL-NEXT: scratch_load_b32 v199, off, s33 offset:488
; GISEL-NEXT: scratch_load_b32 v208, off, s33 offset:492
; GISEL-NEXT: scratch_load_b32 v209, off, s33 offset:496
; GISEL-NEXT: scratch_load_b32 v210, off, s33 offset:500
; GISEL-NEXT: scratch_load_b32 v211, off, s33 offset:504
; GISEL-NEXT: scratch_load_b32 v212, off, s33 offset:508
; GISEL-NEXT: scratch_load_b32 v213, off, s33 offset:512
; GISEL-NEXT: scratch_load_b32 v214, off, s33 offset:516
; GISEL-NEXT: scratch_load_b32 v215, off, s33 offset:520
; GISEL-NEXT: s_clause 0xf
; GISEL-NEXT: scratch_load_b32 v224, off, s33 offset:524
; GISEL-NEXT: scratch_load_b32 v225, off, s33 offset:528
; GISEL-NEXT: scratch_load_b32 v226, off, s33 offset:532
; GISEL-NEXT: scratch_load_b32 v227, off, s33 offset:536
; GISEL-NEXT: scratch_load_b32 v228, off, s33 offset:540
; GISEL-NEXT: scratch_load_b32 v229, off, s33 offset:544
; GISEL-NEXT: scratch_load_b32 v230, off, s33 offset:548
; GISEL-NEXT: scratch_load_b32 v231, off, s33 offset:552
; GISEL-NEXT: scratch_load_b32 v240, off, s33 offset:556
; GISEL-NEXT: scratch_load_b32 v241, off, s33 offset:560
; GISEL-NEXT: scratch_load_b32 v242, off, s33 offset:564
; GISEL-NEXT: scratch_load_b32 v243, off, s33 offset:568
; GISEL-NEXT: scratch_load_b32 v244, off, s33 offset:572
; GISEL-NEXT: scratch_load_b32 v245, off, s33 offset:576
; GISEL-NEXT: scratch_load_b32 v246, off, s33 offset:580
; GISEL-NEXT: scratch_load_b32 v247, off, s33 offset:584
; GISEL-NEXT: s_mov_b32 exec_lo, s4
; GISEL-NEXT: s_mov_b32 s33, s0
; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-NEXT: s_wait_alu 0xfffe
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL64-LABEL: call_from_whole_wave:
; DAGISEL64: ; %bb.0:
; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL64-NEXT: s_wait_expcnt 0x0
; DAGISEL64-NEXT: s_wait_samplecnt 0x0
; DAGISEL64-NEXT: s_wait_bvhcnt 0x0
; DAGISEL64-NEXT: s_wait_kmcnt 0x0
; DAGISEL64-NEXT: s_mov_b32 s0, s33
; DAGISEL64-NEXT: s_mov_b32 s33, s32
; DAGISEL64-NEXT: s_xor_saveexec_b64 s[4:5], -1
; DAGISEL64-NEXT: s_clause 0x1f
; DAGISEL64-NEXT: scratch_store_b32 off, v0, s33 offset:4
; DAGISEL64-NEXT: scratch_store_b32 off, v1, s33 offset:8
; DAGISEL64-NEXT: scratch_store_b32 off, v2, s33 offset:12
; DAGISEL64-NEXT: scratch_store_b32 off, v3, s33 offset:16
; DAGISEL64-NEXT: scratch_store_b32 off, v4, s33 offset:20
; DAGISEL64-NEXT: scratch_store_b32 off, v5, s33 offset:24
; DAGISEL64-NEXT: scratch_store_b32 off, v6, s33 offset:28
; DAGISEL64-NEXT: scratch_store_b32 off, v7, s33 offset:32
; DAGISEL64-NEXT: scratch_store_b32 off, v8, s33 offset:36
; DAGISEL64-NEXT: scratch_store_b32 off, v9, s33 offset:40
; DAGISEL64-NEXT: scratch_store_b32 off, v10, s33 offset:44
; DAGISEL64-NEXT: scratch_store_b32 off, v11, s33 offset:48
; DAGISEL64-NEXT: scratch_store_b32 off, v12, s33 offset:52
; DAGISEL64-NEXT: scratch_store_b32 off, v13, s33 offset:56
; DAGISEL64-NEXT: scratch_store_b32 off, v14, s33 offset:60
; DAGISEL64-NEXT: scratch_store_b32 off, v15, s33 offset:64
; DAGISEL64-NEXT: scratch_store_b32 off, v16, s33 offset:68
; DAGISEL64-NEXT: scratch_store_b32 off, v17, s33 offset:72
; DAGISEL64-NEXT: scratch_store_b32 off, v18, s33 offset:76
; DAGISEL64-NEXT: scratch_store_b32 off, v19, s33 offset:80
; DAGISEL64-NEXT: scratch_store_b32 off, v20, s33 offset:84
; DAGISEL64-NEXT: scratch_store_b32 off, v21, s33 offset:88
; DAGISEL64-NEXT: scratch_store_b32 off, v22, s33 offset:92
; DAGISEL64-NEXT: scratch_store_b32 off, v23, s33 offset:96
; DAGISEL64-NEXT: scratch_store_b32 off, v24, s33 offset:100
; DAGISEL64-NEXT: scratch_store_b32 off, v25, s33 offset:104
; DAGISEL64-NEXT: scratch_store_b32 off, v26, s33 offset:108
; DAGISEL64-NEXT: scratch_store_b32 off, v27, s33 offset:112
; DAGISEL64-NEXT: scratch_store_b32 off, v28, s33 offset:116
; DAGISEL64-NEXT: scratch_store_b32 off, v29, s33 offset:120
; DAGISEL64-NEXT: scratch_store_b32 off, v30, s33 offset:124
; DAGISEL64-NEXT: scratch_store_b32 off, v31, s33 offset:128
; DAGISEL64-NEXT: s_clause 0x1f
; DAGISEL64-NEXT: scratch_store_b32 off, v32, s33 offset:132
; DAGISEL64-NEXT: scratch_store_b32 off, v33, s33 offset:136
; DAGISEL64-NEXT: scratch_store_b32 off, v34, s33 offset:140
; DAGISEL64-NEXT: scratch_store_b32 off, v35, s33 offset:144
; DAGISEL64-NEXT: scratch_store_b32 off, v36, s33 offset:148
; DAGISEL64-NEXT: scratch_store_b32 off, v37, s33 offset:152
; DAGISEL64-NEXT: scratch_store_b32 off, v38, s33 offset:156
; DAGISEL64-NEXT: scratch_store_b32 off, v39, s33 offset:160
; DAGISEL64-NEXT: scratch_store_b32 off, v48, s33 offset:172
; DAGISEL64-NEXT: scratch_store_b32 off, v49, s33 offset:176
; DAGISEL64-NEXT: scratch_store_b32 off, v50, s33 offset:180
; DAGISEL64-NEXT: scratch_store_b32 off, v51, s33 offset:184
; DAGISEL64-NEXT: scratch_store_b32 off, v52, s33 offset:188
; DAGISEL64-NEXT: scratch_store_b32 off, v53, s33 offset:192
; DAGISEL64-NEXT: scratch_store_b32 off, v54, s33 offset:196
; DAGISEL64-NEXT: scratch_store_b32 off, v55, s33 offset:200
; DAGISEL64-NEXT: scratch_store_b32 off, v64, s33 offset:204
; DAGISEL64-NEXT: scratch_store_b32 off, v65, s33 offset:208
; DAGISEL64-NEXT: scratch_store_b32 off, v66, s33 offset:212
; DAGISEL64-NEXT: scratch_store_b32 off, v67, s33 offset:216
; DAGISEL64-NEXT: scratch_store_b32 off, v68, s33 offset:220
; DAGISEL64-NEXT: scratch_store_b32 off, v69, s33 offset:224
; DAGISEL64-NEXT: scratch_store_b32 off, v70, s33 offset:228
; DAGISEL64-NEXT: scratch_store_b32 off, v71, s33 offset:232
; DAGISEL64-NEXT: scratch_store_b32 off, v80, s33 offset:236
; DAGISEL64-NEXT: scratch_store_b32 off, v81, s33 offset:240
; DAGISEL64-NEXT: scratch_store_b32 off, v82, s33 offset:244
; DAGISEL64-NEXT: scratch_store_b32 off, v83, s33 offset:248
; DAGISEL64-NEXT: scratch_store_b32 off, v84, s33 offset:252
; DAGISEL64-NEXT: scratch_store_b32 off, v85, s33 offset:256
; DAGISEL64-NEXT: scratch_store_b32 off, v86, s33 offset:260
; DAGISEL64-NEXT: scratch_store_b32 off, v87, s33 offset:264
; DAGISEL64-NEXT: s_clause 0x1f
; DAGISEL64-NEXT: scratch_store_b32 off, v96, s33 offset:268
; DAGISEL64-NEXT: scratch_store_b32 off, v97, s33 offset:272
; DAGISEL64-NEXT: scratch_store_b32 off, v98, s33 offset:276
; DAGISEL64-NEXT: scratch_store_b32 off, v99, s33 offset:280
; DAGISEL64-NEXT: scratch_store_b32 off, v100, s33 offset:284
; DAGISEL64-NEXT: scratch_store_b32 off, v101, s33 offset:288
; DAGISEL64-NEXT: scratch_store_b32 off, v102, s33 offset:292
; DAGISEL64-NEXT: scratch_store_b32 off, v103, s33 offset:296
; DAGISEL64-NEXT: scratch_store_b32 off, v112, s33 offset:300
; DAGISEL64-NEXT: scratch_store_b32 off, v113, s33 offset:304
; DAGISEL64-NEXT: scratch_store_b32 off, v114, s33 offset:308
; DAGISEL64-NEXT: scratch_store_b32 off, v115, s33 offset:312
; DAGISEL64-NEXT: scratch_store_b32 off, v116, s33 offset:316
; DAGISEL64-NEXT: scratch_store_b32 off, v117, s33 offset:320
; DAGISEL64-NEXT: scratch_store_b32 off, v118, s33 offset:324
; DAGISEL64-NEXT: scratch_store_b32 off, v119, s33 offset:328
; DAGISEL64-NEXT: scratch_store_b32 off, v128, s33 offset:332
; DAGISEL64-NEXT: scratch_store_b32 off, v129, s33 offset:336
; DAGISEL64-NEXT: scratch_store_b32 off, v130, s33 offset:340
; DAGISEL64-NEXT: scratch_store_b32 off, v131, s33 offset:344
; DAGISEL64-NEXT: scratch_store_b32 off, v132, s33 offset:348
; DAGISEL64-NEXT: scratch_store_b32 off, v133, s33 offset:352
; DAGISEL64-NEXT: scratch_store_b32 off, v134, s33 offset:356
; DAGISEL64-NEXT: scratch_store_b32 off, v135, s33 offset:360
; DAGISEL64-NEXT: scratch_store_b32 off, v144, s33 offset:364
; DAGISEL64-NEXT: scratch_store_b32 off, v145, s33 offset:368
; DAGISEL64-NEXT: scratch_store_b32 off, v146, s33 offset:372
; DAGISEL64-NEXT: scratch_store_b32 off, v147, s33 offset:376
; DAGISEL64-NEXT: scratch_store_b32 off, v148, s33 offset:380
; DAGISEL64-NEXT: scratch_store_b32 off, v149, s33 offset:384
; DAGISEL64-NEXT: scratch_store_b32 off, v150, s33 offset:388
; DAGISEL64-NEXT: scratch_store_b32 off, v151, s33 offset:392
; DAGISEL64-NEXT: s_clause 0x1f
; DAGISEL64-NEXT: scratch_store_b32 off, v160, s33 offset:396
; DAGISEL64-NEXT: scratch_store_b32 off, v161, s33 offset:400
; DAGISEL64-NEXT: scratch_store_b32 off, v162, s33 offset:404
; DAGISEL64-NEXT: scratch_store_b32 off, v163, s33 offset:408
; DAGISEL64-NEXT: scratch_store_b32 off, v164, s33 offset:412
; DAGISEL64-NEXT: scratch_store_b32 off, v165, s33 offset:416
; DAGISEL64-NEXT: scratch_store_b32 off, v166, s33 offset:420
; DAGISEL64-NEXT: scratch_store_b32 off, v167, s33 offset:424
; DAGISEL64-NEXT: scratch_store_b32 off, v176, s33 offset:428
; DAGISEL64-NEXT: scratch_store_b32 off, v177, s33 offset:432
; DAGISEL64-NEXT: scratch_store_b32 off, v178, s33 offset:436
; DAGISEL64-NEXT: scratch_store_b32 off, v179, s33 offset:440
; DAGISEL64-NEXT: scratch_store_b32 off, v180, s33 offset:444
; DAGISEL64-NEXT: scratch_store_b32 off, v181, s33 offset:448
; DAGISEL64-NEXT: scratch_store_b32 off, v182, s33 offset:452
; DAGISEL64-NEXT: scratch_store_b32 off, v183, s33 offset:456
; DAGISEL64-NEXT: scratch_store_b32 off, v192, s33 offset:460
; DAGISEL64-NEXT: scratch_store_b32 off, v193, s33 offset:464
; DAGISEL64-NEXT: scratch_store_b32 off, v194, s33 offset:468
; DAGISEL64-NEXT: scratch_store_b32 off, v195, s33 offset:472
; DAGISEL64-NEXT: scratch_store_b32 off, v196, s33 offset:476
; DAGISEL64-NEXT: scratch_store_b32 off, v197, s33 offset:480
; DAGISEL64-NEXT: scratch_store_b32 off, v198, s33 offset:484
; DAGISEL64-NEXT: scratch_store_b32 off, v199, s33 offset:488
; DAGISEL64-NEXT: scratch_store_b32 off, v208, s33 offset:492
; DAGISEL64-NEXT: scratch_store_b32 off, v209, s33 offset:496
; DAGISEL64-NEXT: scratch_store_b32 off, v210, s33 offset:500
; DAGISEL64-NEXT: scratch_store_b32 off, v211, s33 offset:504
; DAGISEL64-NEXT: scratch_store_b32 off, v212, s33 offset:508
; DAGISEL64-NEXT: scratch_store_b32 off, v213, s33 offset:512
; DAGISEL64-NEXT: scratch_store_b32 off, v214, s33 offset:516
; DAGISEL64-NEXT: scratch_store_b32 off, v215, s33 offset:520
; DAGISEL64-NEXT: s_clause 0xf
; DAGISEL64-NEXT: scratch_store_b32 off, v224, s33 offset:524
; DAGISEL64-NEXT: scratch_store_b32 off, v225, s33 offset:528
; DAGISEL64-NEXT: scratch_store_b32 off, v226, s33 offset:532
; DAGISEL64-NEXT: scratch_store_b32 off, v227, s33 offset:536
; DAGISEL64-NEXT: scratch_store_b32 off, v228, s33 offset:540
; DAGISEL64-NEXT: scratch_store_b32 off, v229, s33 offset:544
; DAGISEL64-NEXT: scratch_store_b32 off, v230, s33 offset:548
; DAGISEL64-NEXT: scratch_store_b32 off, v231, s33 offset:552
; DAGISEL64-NEXT: scratch_store_b32 off, v240, s33 offset:556
; DAGISEL64-NEXT: scratch_store_b32 off, v241, s33 offset:560
; DAGISEL64-NEXT: scratch_store_b32 off, v242, s33 offset:564
; DAGISEL64-NEXT: scratch_store_b32 off, v243, s33 offset:568
; DAGISEL64-NEXT: scratch_store_b32 off, v244, s33 offset:572
; DAGISEL64-NEXT: scratch_store_b32 off, v245, s33 offset:576
; DAGISEL64-NEXT: scratch_store_b32 off, v246, s33 offset:580
; DAGISEL64-NEXT: scratch_store_b32 off, v247, s33 offset:584
; DAGISEL64-NEXT: s_mov_b64 exec, -1
; DAGISEL64-NEXT: s_clause 0x2
; DAGISEL64-NEXT: scratch_store_b32 off, v42, s33
; DAGISEL64-NEXT: scratch_store_b32 off, v40, s33 offset:164
; DAGISEL64-NEXT: scratch_store_b32 off, v41, s33 offset:168
; DAGISEL64-NEXT: s_wait_alu 0xfffe
; DAGISEL64-NEXT: v_writelane_b32 v42, s0, 4
; DAGISEL64-NEXT: s_mov_b32 s1, callee@abs32@hi
; DAGISEL64-NEXT: s_mov_b32 s0, callee@abs32@lo
; DAGISEL64-NEXT: s_addk_co_i32 s32, 0x250
; DAGISEL64-NEXT: v_mov_b32_e32 v41, v9
; DAGISEL64-NEXT: v_writelane_b32 v42, s4, 0
; DAGISEL64-NEXT: v_mov_b32_e32 v40, v8
; DAGISEL64-NEXT: v_writelane_b32 v42, s5, 1
; DAGISEL64-NEXT: v_writelane_b32 v42, s30, 2
; DAGISEL64-NEXT: v_writelane_b32 v42, s31, 3
; DAGISEL64-NEXT: s_wait_alu 0xfffe
; DAGISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1]
; DAGISEL64-NEXT: flat_store_b32 v[40:41], v0
; DAGISEL64-NEXT: v_readlane_b32 s31, v42, 3
; DAGISEL64-NEXT: v_readlane_b32 s30, v42, 2
; DAGISEL64-NEXT: v_readlane_b32 s5, v42, 1
; DAGISEL64-NEXT: v_readlane_b32 s4, v42, 0
; DAGISEL64-NEXT: v_readlane_b32 s0, v42, 4
; DAGISEL64-NEXT: s_clause 0x2
; DAGISEL64-NEXT: scratch_load_b32 v42, off, s33
; DAGISEL64-NEXT: scratch_load_b32 v40, off, s33 offset:164
; DAGISEL64-NEXT: scratch_load_b32 v41, off, s33 offset:168
; DAGISEL64-NEXT: s_mov_b32 s32, s33
; DAGISEL64-NEXT: s_xor_b64 exec, s[4:5], -1
; DAGISEL64-NEXT: s_clause 0x1f
; DAGISEL64-NEXT: scratch_load_b32 v0, off, s33 offset:4
; DAGISEL64-NEXT: scratch_load_b32 v1, off, s33 offset:8
; DAGISEL64-NEXT: scratch_load_b32 v2, off, s33 offset:12
; DAGISEL64-NEXT: scratch_load_b32 v3, off, s33 offset:16
; DAGISEL64-NEXT: scratch_load_b32 v4, off, s33 offset:20
; DAGISEL64-NEXT: scratch_load_b32 v5, off, s33 offset:24
; DAGISEL64-NEXT: scratch_load_b32 v6, off, s33 offset:28
; DAGISEL64-NEXT: scratch_load_b32 v7, off, s33 offset:32
; DAGISEL64-NEXT: scratch_load_b32 v8, off, s33 offset:36
; DAGISEL64-NEXT: scratch_load_b32 v9, off, s33 offset:40
; DAGISEL64-NEXT: scratch_load_b32 v10, off, s33 offset:44
; DAGISEL64-NEXT: scratch_load_b32 v11, off, s33 offset:48
; DAGISEL64-NEXT: scratch_load_b32 v12, off, s33 offset:52
; DAGISEL64-NEXT: scratch_load_b32 v13, off, s33 offset:56
; DAGISEL64-NEXT: scratch_load_b32 v14, off, s33 offset:60
; DAGISEL64-NEXT: scratch_load_b32 v15, off, s33 offset:64
; DAGISEL64-NEXT: scratch_load_b32 v16, off, s33 offset:68
; DAGISEL64-NEXT: scratch_load_b32 v17, off, s33 offset:72
; DAGISEL64-NEXT: scratch_load_b32 v18, off, s33 offset:76
; DAGISEL64-NEXT: scratch_load_b32 v19, off, s33 offset:80
; DAGISEL64-NEXT: scratch_load_b32 v20, off, s33 offset:84
; DAGISEL64-NEXT: scratch_load_b32 v21, off, s33 offset:88
; DAGISEL64-NEXT: scratch_load_b32 v22, off, s33 offset:92
; DAGISEL64-NEXT: scratch_load_b32 v23, off, s33 offset:96
; DAGISEL64-NEXT: scratch_load_b32 v24, off, s33 offset:100
; DAGISEL64-NEXT: scratch_load_b32 v25, off, s33 offset:104
; DAGISEL64-NEXT: scratch_load_b32 v26, off, s33 offset:108
; DAGISEL64-NEXT: scratch_load_b32 v27, off, s33 offset:112
; DAGISEL64-NEXT: scratch_load_b32 v28, off, s33 offset:116
; DAGISEL64-NEXT: scratch_load_b32 v29, off, s33 offset:120
; DAGISEL64-NEXT: scratch_load_b32 v30, off, s33 offset:124
; DAGISEL64-NEXT: scratch_load_b32 v31, off, s33 offset:128
; DAGISEL64-NEXT: s_clause 0x1f
; DAGISEL64-NEXT: scratch_load_b32 v32, off, s33 offset:132
; DAGISEL64-NEXT: scratch_load_b32 v33, off, s33 offset:136
; DAGISEL64-NEXT: scratch_load_b32 v34, off, s33 offset:140
; DAGISEL64-NEXT: scratch_load_b32 v35, off, s33 offset:144
; DAGISEL64-NEXT: scratch_load_b32 v36, off, s33 offset:148
; DAGISEL64-NEXT: scratch_load_b32 v37, off, s33 offset:152
; DAGISEL64-NEXT: scratch_load_b32 v38, off, s33 offset:156
; DAGISEL64-NEXT: scratch_load_b32 v39, off, s33 offset:160
; DAGISEL64-NEXT: scratch_load_b32 v48, off, s33 offset:172
; DAGISEL64-NEXT: scratch_load_b32 v49, off, s33 offset:176
; DAGISEL64-NEXT: scratch_load_b32 v50, off, s33 offset:180
; DAGISEL64-NEXT: scratch_load_b32 v51, off, s33 offset:184
; DAGISEL64-NEXT: scratch_load_b32 v52, off, s33 offset:188
; DAGISEL64-NEXT: scratch_load_b32 v53, off, s33 offset:192
; DAGISEL64-NEXT: scratch_load_b32 v54, off, s33 offset:196
; DAGISEL64-NEXT: scratch_load_b32 v55, off, s33 offset:200
; DAGISEL64-NEXT: scratch_load_b32 v64, off, s33 offset:204
; DAGISEL64-NEXT: scratch_load_b32 v65, off, s33 offset:208
; DAGISEL64-NEXT: scratch_load_b32 v66, off, s33 offset:212
; DAGISEL64-NEXT: scratch_load_b32 v67, off, s33 offset:216
; DAGISEL64-NEXT: scratch_load_b32 v68, off, s33 offset:220
; DAGISEL64-NEXT: scratch_load_b32 v69, off, s33 offset:224
; DAGISEL64-NEXT: scratch_load_b32 v70, off, s33 offset:228
; DAGISEL64-NEXT: scratch_load_b32 v71, off, s33 offset:232
; DAGISEL64-NEXT: scratch_load_b32 v80, off, s33 offset:236
; DAGISEL64-NEXT: scratch_load_b32 v81, off, s33 offset:240
; DAGISEL64-NEXT: scratch_load_b32 v82, off, s33 offset:244
; DAGISEL64-NEXT: scratch_load_b32 v83, off, s33 offset:248
; DAGISEL64-NEXT: scratch_load_b32 v84, off, s33 offset:252
; DAGISEL64-NEXT: scratch_load_b32 v85, off, s33 offset:256
; DAGISEL64-NEXT: scratch_load_b32 v86, off, s33 offset:260
; DAGISEL64-NEXT: scratch_load_b32 v87, off, s33 offset:264
; DAGISEL64-NEXT: s_clause 0x1f
; DAGISEL64-NEXT: scratch_load_b32 v96, off, s33 offset:268
; DAGISEL64-NEXT: scratch_load_b32 v97, off, s33 offset:272
; DAGISEL64-NEXT: scratch_load_b32 v98, off, s33 offset:276
; DAGISEL64-NEXT: scratch_load_b32 v99, off, s33 offset:280
; DAGISEL64-NEXT: scratch_load_b32 v100, off, s33 offset:284
; DAGISEL64-NEXT: scratch_load_b32 v101, off, s33 offset:288
; DAGISEL64-NEXT: scratch_load_b32 v102, off, s33 offset:292
; DAGISEL64-NEXT: scratch_load_b32 v103, off, s33 offset:296
; DAGISEL64-NEXT: scratch_load_b32 v112, off, s33 offset:300
; DAGISEL64-NEXT: scratch_load_b32 v113, off, s33 offset:304
; DAGISEL64-NEXT: scratch_load_b32 v114, off, s33 offset:308
; DAGISEL64-NEXT: scratch_load_b32 v115, off, s33 offset:312
; DAGISEL64-NEXT: scratch_load_b32 v116, off, s33 offset:316
; DAGISEL64-NEXT: scratch_load_b32 v117, off, s33 offset:320
; DAGISEL64-NEXT: scratch_load_b32 v118, off, s33 offset:324
; DAGISEL64-NEXT: scratch_load_b32 v119, off, s33 offset:328
; DAGISEL64-NEXT: scratch_load_b32 v128, off, s33 offset:332
; DAGISEL64-NEXT: scratch_load_b32 v129, off, s33 offset:336
; DAGISEL64-NEXT: scratch_load_b32 v130, off, s33 offset:340
; DAGISEL64-NEXT: scratch_load_b32 v131, off, s33 offset:344
; DAGISEL64-NEXT: scratch_load_b32 v132, off, s33 offset:348
; DAGISEL64-NEXT: scratch_load_b32 v133, off, s33 offset:352
; DAGISEL64-NEXT: scratch_load_b32 v134, off, s33 offset:356
; DAGISEL64-NEXT: scratch_load_b32 v135, off, s33 offset:360
; DAGISEL64-NEXT: scratch_load_b32 v144, off, s33 offset:364
; DAGISEL64-NEXT: scratch_load_b32 v145, off, s33 offset:368
; DAGISEL64-NEXT: scratch_load_b32 v146, off, s33 offset:372
; DAGISEL64-NEXT: scratch_load_b32 v147, off, s33 offset:376
; DAGISEL64-NEXT: scratch_load_b32 v148, off, s33 offset:380
; DAGISEL64-NEXT: scratch_load_b32 v149, off, s33 offset:384
; DAGISEL64-NEXT: scratch_load_b32 v150, off, s33 offset:388
; DAGISEL64-NEXT: scratch_load_b32 v151, off, s33 offset:392
; DAGISEL64-NEXT: s_clause 0x1f
; DAGISEL64-NEXT: scratch_load_b32 v160, off, s33 offset:396
; DAGISEL64-NEXT: scratch_load_b32 v161, off, s33 offset:400
; DAGISEL64-NEXT: scratch_load_b32 v162, off, s33 offset:404
; DAGISEL64-NEXT: scratch_load_b32 v163, off, s33 offset:408
; DAGISEL64-NEXT: scratch_load_b32 v164, off, s33 offset:412
; DAGISEL64-NEXT: scratch_load_b32 v165, off, s33 offset:416
; DAGISEL64-NEXT: scratch_load_b32 v166, off, s33 offset:420
; DAGISEL64-NEXT: scratch_load_b32 v167, off, s33 offset:424
; DAGISEL64-NEXT: scratch_load_b32 v176, off, s33 offset:428
; DAGISEL64-NEXT: scratch_load_b32 v177, off, s33 offset:432
; DAGISEL64-NEXT: scratch_load_b32 v178, off, s33 offset:436
; DAGISEL64-NEXT: scratch_load_b32 v179, off, s33 offset:440
; DAGISEL64-NEXT: scratch_load_b32 v180, off, s33 offset:444
; DAGISEL64-NEXT: scratch_load_b32 v181, off, s33 offset:448
; DAGISEL64-NEXT: scratch_load_b32 v182, off, s33 offset:452
; DAGISEL64-NEXT: scratch_load_b32 v183, off, s33 offset:456
; DAGISEL64-NEXT: scratch_load_b32 v192, off, s33 offset:460
; DAGISEL64-NEXT: scratch_load_b32 v193, off, s33 offset:464
; DAGISEL64-NEXT: scratch_load_b32 v194, off, s33 offset:468
; DAGISEL64-NEXT: scratch_load_b32 v195, off, s33 offset:472
; DAGISEL64-NEXT: scratch_load_b32 v196, off, s33 offset:476
; DAGISEL64-NEXT: scratch_load_b32 v197, off, s33 offset:480
; DAGISEL64-NEXT: scratch_load_b32 v198, off, s33 offset:484
; DAGISEL64-NEXT: scratch_load_b32 v199, off, s33 offset:488
; DAGISEL64-NEXT: scratch_load_b32 v208, off, s33 offset:492
; DAGISEL64-NEXT: scratch_load_b32 v209, off, s33 offset:496
; DAGISEL64-NEXT: scratch_load_b32 v210, off, s33 offset:500
; DAGISEL64-NEXT: scratch_load_b32 v211, off, s33 offset:504
; DAGISEL64-NEXT: scratch_load_b32 v212, off, s33 offset:508
; DAGISEL64-NEXT: scratch_load_b32 v213, off, s33 offset:512
; DAGISEL64-NEXT: scratch_load_b32 v214, off, s33 offset:516
; DAGISEL64-NEXT: scratch_load_b32 v215, off, s33 offset:520
; DAGISEL64-NEXT: s_clause 0xf
; DAGISEL64-NEXT: scratch_load_b32 v224, off, s33 offset:524
; DAGISEL64-NEXT: scratch_load_b32 v225, off, s33 offset:528
; DAGISEL64-NEXT: scratch_load_b32 v226, off, s33 offset:532
; DAGISEL64-NEXT: scratch_load_b32 v227, off, s33 offset:536
; DAGISEL64-NEXT: scratch_load_b32 v228, off, s33 offset:540
; DAGISEL64-NEXT: scratch_load_b32 v229, off, s33 offset:544
; DAGISEL64-NEXT: scratch_load_b32 v230, off, s33 offset:548
; DAGISEL64-NEXT: scratch_load_b32 v231, off, s33 offset:552
; DAGISEL64-NEXT: scratch_load_b32 v240, off, s33 offset:556
; DAGISEL64-NEXT: scratch_load_b32 v241, off, s33 offset:560
; DAGISEL64-NEXT: scratch_load_b32 v242, off, s33 offset:564
; DAGISEL64-NEXT: scratch_load_b32 v243, off, s33 offset:568
; DAGISEL64-NEXT: scratch_load_b32 v244, off, s33 offset:572
; DAGISEL64-NEXT: scratch_load_b32 v245, off, s33 offset:576
; DAGISEL64-NEXT: scratch_load_b32 v246, off, s33 offset:580
; DAGISEL64-NEXT: scratch_load_b32 v247, off, s33 offset:584
; DAGISEL64-NEXT: s_mov_b64 exec, s[4:5]
; DAGISEL64-NEXT: s_mov_b32 s33, s0
; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL64-NEXT: s_wait_alu 0xfffe
; DAGISEL64-NEXT: s_setpc_b64 s[30:31]
;
; GISEL64-LABEL: call_from_whole_wave:
; GISEL64: ; %bb.0:
; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL64-NEXT: s_wait_expcnt 0x0
; GISEL64-NEXT: s_wait_samplecnt 0x0
; GISEL64-NEXT: s_wait_bvhcnt 0x0
; GISEL64-NEXT: s_wait_kmcnt 0x0
; GISEL64-NEXT: s_mov_b32 s0, s33
; GISEL64-NEXT: s_mov_b32 s33, s32
; GISEL64-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GISEL64-NEXT: s_clause 0x1f
; GISEL64-NEXT: scratch_store_b32 off, v0, s33 offset:4
; GISEL64-NEXT: scratch_store_b32 off, v1, s33 offset:8
; GISEL64-NEXT: scratch_store_b32 off, v2, s33 offset:12
; GISEL64-NEXT: scratch_store_b32 off, v3, s33 offset:16
; GISEL64-NEXT: scratch_store_b32 off, v4, s33 offset:20
; GISEL64-NEXT: scratch_store_b32 off, v5, s33 offset:24
; GISEL64-NEXT: scratch_store_b32 off, v6, s33 offset:28
; GISEL64-NEXT: scratch_store_b32 off, v7, s33 offset:32
; GISEL64-NEXT: scratch_store_b32 off, v8, s33 offset:36
; GISEL64-NEXT: scratch_store_b32 off, v9, s33 offset:40
; GISEL64-NEXT: scratch_store_b32 off, v10, s33 offset:44
; GISEL64-NEXT: scratch_store_b32 off, v11, s33 offset:48
; GISEL64-NEXT: scratch_store_b32 off, v12, s33 offset:52
; GISEL64-NEXT: scratch_store_b32 off, v13, s33 offset:56
; GISEL64-NEXT: scratch_store_b32 off, v14, s33 offset:60
; GISEL64-NEXT: scratch_store_b32 off, v15, s33 offset:64
; GISEL64-NEXT: scratch_store_b32 off, v16, s33 offset:68
; GISEL64-NEXT: scratch_store_b32 off, v17, s33 offset:72
; GISEL64-NEXT: scratch_store_b32 off, v18, s33 offset:76
; GISEL64-NEXT: scratch_store_b32 off, v19, s33 offset:80
; GISEL64-NEXT: scratch_store_b32 off, v20, s33 offset:84
; GISEL64-NEXT: scratch_store_b32 off, v21, s33 offset:88
; GISEL64-NEXT: scratch_store_b32 off, v22, s33 offset:92
; GISEL64-NEXT: scratch_store_b32 off, v23, s33 offset:96
; GISEL64-NEXT: scratch_store_b32 off, v24, s33 offset:100
; GISEL64-NEXT: scratch_store_b32 off, v25, s33 offset:104
; GISEL64-NEXT: scratch_store_b32 off, v26, s33 offset:108
; GISEL64-NEXT: scratch_store_b32 off, v27, s33 offset:112
; GISEL64-NEXT: scratch_store_b32 off, v28, s33 offset:116
; GISEL64-NEXT: scratch_store_b32 off, v29, s33 offset:120
; GISEL64-NEXT: scratch_store_b32 off, v30, s33 offset:124
; GISEL64-NEXT: scratch_store_b32 off, v31, s33 offset:128
; GISEL64-NEXT: s_clause 0x1f
; GISEL64-NEXT: scratch_store_b32 off, v32, s33 offset:132
; GISEL64-NEXT: scratch_store_b32 off, v33, s33 offset:136
; GISEL64-NEXT: scratch_store_b32 off, v34, s33 offset:140
; GISEL64-NEXT: scratch_store_b32 off, v35, s33 offset:144
; GISEL64-NEXT: scratch_store_b32 off, v36, s33 offset:148
; GISEL64-NEXT: scratch_store_b32 off, v37, s33 offset:152
; GISEL64-NEXT: scratch_store_b32 off, v38, s33 offset:156
; GISEL64-NEXT: scratch_store_b32 off, v39, s33 offset:160
; GISEL64-NEXT: scratch_store_b32 off, v48, s33 offset:172
; GISEL64-NEXT: scratch_store_b32 off, v49, s33 offset:176
; GISEL64-NEXT: scratch_store_b32 off, v50, s33 offset:180
; GISEL64-NEXT: scratch_store_b32 off, v51, s33 offset:184
; GISEL64-NEXT: scratch_store_b32 off, v52, s33 offset:188
; GISEL64-NEXT: scratch_store_b32 off, v53, s33 offset:192
; GISEL64-NEXT: scratch_store_b32 off, v54, s33 offset:196
; GISEL64-NEXT: scratch_store_b32 off, v55, s33 offset:200
; GISEL64-NEXT: scratch_store_b32 off, v64, s33 offset:204
; GISEL64-NEXT: scratch_store_b32 off, v65, s33 offset:208
; GISEL64-NEXT: scratch_store_b32 off, v66, s33 offset:212
; GISEL64-NEXT: scratch_store_b32 off, v67, s33 offset:216
; GISEL64-NEXT: scratch_store_b32 off, v68, s33 offset:220
; GISEL64-NEXT: scratch_store_b32 off, v69, s33 offset:224
; GISEL64-NEXT: scratch_store_b32 off, v70, s33 offset:228
; GISEL64-NEXT: scratch_store_b32 off, v71, s33 offset:232
; GISEL64-NEXT: scratch_store_b32 off, v80, s33 offset:236
; GISEL64-NEXT: scratch_store_b32 off, v81, s33 offset:240
; GISEL64-NEXT: scratch_store_b32 off, v82, s33 offset:244
; GISEL64-NEXT: scratch_store_b32 off, v83, s33 offset:248
; GISEL64-NEXT: scratch_store_b32 off, v84, s33 offset:252
; GISEL64-NEXT: scratch_store_b32 off, v85, s33 offset:256
; GISEL64-NEXT: scratch_store_b32 off, v86, s33 offset:260
; GISEL64-NEXT: scratch_store_b32 off, v87, s33 offset:264
; GISEL64-NEXT: s_clause 0x1f
; GISEL64-NEXT: scratch_store_b32 off, v96, s33 offset:268
; GISEL64-NEXT: scratch_store_b32 off, v97, s33 offset:272
; GISEL64-NEXT: scratch_store_b32 off, v98, s33 offset:276
; GISEL64-NEXT: scratch_store_b32 off, v99, s33 offset:280
; GISEL64-NEXT: scratch_store_b32 off, v100, s33 offset:284
; GISEL64-NEXT: scratch_store_b32 off, v101, s33 offset:288
; GISEL64-NEXT: scratch_store_b32 off, v102, s33 offset:292
; GISEL64-NEXT: scratch_store_b32 off, v103, s33 offset:296
; GISEL64-NEXT: scratch_store_b32 off, v112, s33 offset:300
; GISEL64-NEXT: scratch_store_b32 off, v113, s33 offset:304
; GISEL64-NEXT: scratch_store_b32 off, v114, s33 offset:308
; GISEL64-NEXT: scratch_store_b32 off, v115, s33 offset:312
; GISEL64-NEXT: scratch_store_b32 off, v116, s33 offset:316
; GISEL64-NEXT: scratch_store_b32 off, v117, s33 offset:320
; GISEL64-NEXT: scratch_store_b32 off, v118, s33 offset:324
; GISEL64-NEXT: scratch_store_b32 off, v119, s33 offset:328
; GISEL64-NEXT: scratch_store_b32 off, v128, s33 offset:332
; GISEL64-NEXT: scratch_store_b32 off, v129, s33 offset:336
; GISEL64-NEXT: scratch_store_b32 off, v130, s33 offset:340
; GISEL64-NEXT: scratch_store_b32 off, v131, s33 offset:344
; GISEL64-NEXT: scratch_store_b32 off, v132, s33 offset:348
; GISEL64-NEXT: scratch_store_b32 off, v133, s33 offset:352
; GISEL64-NEXT: scratch_store_b32 off, v134, s33 offset:356
; GISEL64-NEXT: scratch_store_b32 off, v135, s33 offset:360
; GISEL64-NEXT: scratch_store_b32 off, v144, s33 offset:364
; GISEL64-NEXT: scratch_store_b32 off, v145, s33 offset:368
; GISEL64-NEXT: scratch_store_b32 off, v146, s33 offset:372
; GISEL64-NEXT: scratch_store_b32 off, v147, s33 offset:376
; GISEL64-NEXT: scratch_store_b32 off, v148, s33 offset:380
; GISEL64-NEXT: scratch_store_b32 off, v149, s33 offset:384
; GISEL64-NEXT: scratch_store_b32 off, v150, s33 offset:388
; GISEL64-NEXT: scratch_store_b32 off, v151, s33 offset:392
; GISEL64-NEXT: s_clause 0x1f
; GISEL64-NEXT: scratch_store_b32 off, v160, s33 offset:396
; GISEL64-NEXT: scratch_store_b32 off, v161, s33 offset:400
; GISEL64-NEXT: scratch_store_b32 off, v162, s33 offset:404
; GISEL64-NEXT: scratch_store_b32 off, v163, s33 offset:408
; GISEL64-NEXT: scratch_store_b32 off, v164, s33 offset:412
; GISEL64-NEXT: scratch_store_b32 off, v165, s33 offset:416
; GISEL64-NEXT: scratch_store_b32 off, v166, s33 offset:420
; GISEL64-NEXT: scratch_store_b32 off, v167, s33 offset:424
; GISEL64-NEXT: scratch_store_b32 off, v176, s33 offset:428
; GISEL64-NEXT: scratch_store_b32 off, v177, s33 offset:432
; GISEL64-NEXT: scratch_store_b32 off, v178, s33 offset:436
; GISEL64-NEXT: scratch_store_b32 off, v179, s33 offset:440
; GISEL64-NEXT: scratch_store_b32 off, v180, s33 offset:444
; GISEL64-NEXT: scratch_store_b32 off, v181, s33 offset:448
; GISEL64-NEXT: scratch_store_b32 off, v182, s33 offset:452
; GISEL64-NEXT: scratch_store_b32 off, v183, s33 offset:456
; GISEL64-NEXT: scratch_store_b32 off, v192, s33 offset:460
; GISEL64-NEXT: scratch_store_b32 off, v193, s33 offset:464
; GISEL64-NEXT: scratch_store_b32 off, v194, s33 offset:468
; GISEL64-NEXT: scratch_store_b32 off, v195, s33 offset:472
; GISEL64-NEXT: scratch_store_b32 off, v196, s33 offset:476
; GISEL64-NEXT: scratch_store_b32 off, v197, s33 offset:480
; GISEL64-NEXT: scratch_store_b32 off, v198, s33 offset:484
; GISEL64-NEXT: scratch_store_b32 off, v199, s33 offset:488
; GISEL64-NEXT: scratch_store_b32 off, v208, s33 offset:492
; GISEL64-NEXT: scratch_store_b32 off, v209, s33 offset:496
; GISEL64-NEXT: scratch_store_b32 off, v210, s33 offset:500
; GISEL64-NEXT: scratch_store_b32 off, v211, s33 offset:504
; GISEL64-NEXT: scratch_store_b32 off, v212, s33 offset:508
; GISEL64-NEXT: scratch_store_b32 off, v213, s33 offset:512
; GISEL64-NEXT: scratch_store_b32 off, v214, s33 offset:516
; GISEL64-NEXT: scratch_store_b32 off, v215, s33 offset:520
; GISEL64-NEXT: s_clause 0xf
; GISEL64-NEXT: scratch_store_b32 off, v224, s33 offset:524
; GISEL64-NEXT: scratch_store_b32 off, v225, s33 offset:528
; GISEL64-NEXT: scratch_store_b32 off, v226, s33 offset:532
; GISEL64-NEXT: scratch_store_b32 off, v227, s33 offset:536
; GISEL64-NEXT: scratch_store_b32 off, v228, s33 offset:540
; GISEL64-NEXT: scratch_store_b32 off, v229, s33 offset:544
; GISEL64-NEXT: scratch_store_b32 off, v230, s33 offset:548
; GISEL64-NEXT: scratch_store_b32 off, v231, s33 offset:552
; GISEL64-NEXT: scratch_store_b32 off, v240, s33 offset:556
; GISEL64-NEXT: scratch_store_b32 off, v241, s33 offset:560
; GISEL64-NEXT: scratch_store_b32 off, v242, s33 offset:564
; GISEL64-NEXT: scratch_store_b32 off, v243, s33 offset:568
; GISEL64-NEXT: scratch_store_b32 off, v244, s33 offset:572
; GISEL64-NEXT: scratch_store_b32 off, v245, s33 offset:576
; GISEL64-NEXT: scratch_store_b32 off, v246, s33 offset:580
; GISEL64-NEXT: scratch_store_b32 off, v247, s33 offset:584
; GISEL64-NEXT: s_mov_b64 exec, -1
; GISEL64-NEXT: s_clause 0x2
; GISEL64-NEXT: scratch_store_b32 off, v42, s33
; GISEL64-NEXT: scratch_store_b32 off, v40, s33 offset:164
; GISEL64-NEXT: scratch_store_b32 off, v41, s33 offset:168
; GISEL64-NEXT: s_wait_alu 0xfffe
; GISEL64-NEXT: v_writelane_b32 v42, s0, 4
; GISEL64-NEXT: s_mov_b32 s0, callee@abs32@lo
; GISEL64-NEXT: s_mov_b32 s1, callee@abs32@hi
; GISEL64-NEXT: s_addk_co_i32 s32, 0x250
; GISEL64-NEXT: v_mov_b32_e32 v40, v8
; GISEL64-NEXT: v_writelane_b32 v42, s4, 0
; GISEL64-NEXT: v_mov_b32_e32 v41, v9
; GISEL64-NEXT: v_writelane_b32 v42, s5, 1
; GISEL64-NEXT: v_writelane_b32 v42, s30, 2
; GISEL64-NEXT: v_writelane_b32 v42, s31, 3
; GISEL64-NEXT: s_wait_alu 0xfffe
; GISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GISEL64-NEXT: flat_store_b32 v[40:41], v0
; GISEL64-NEXT: v_readlane_b32 s31, v42, 3
; GISEL64-NEXT: v_readlane_b32 s30, v42, 2
; GISEL64-NEXT: v_readlane_b32 s5, v42, 1
; GISEL64-NEXT: v_readlane_b32 s4, v42, 0
; GISEL64-NEXT: v_readlane_b32 s0, v42, 4
; GISEL64-NEXT: s_clause 0x2
; GISEL64-NEXT: scratch_load_b32 v42, off, s33
; GISEL64-NEXT: scratch_load_b32 v40, off, s33 offset:164
; GISEL64-NEXT: scratch_load_b32 v41, off, s33 offset:168
; GISEL64-NEXT: s_mov_b32 s32, s33
; GISEL64-NEXT: s_xor_b64 exec, s[4:5], -1
; GISEL64-NEXT: s_clause 0x1f
; GISEL64-NEXT: scratch_load_b32 v0, off, s33 offset:4
; GISEL64-NEXT: scratch_load_b32 v1, off, s33 offset:8
; GISEL64-NEXT: scratch_load_b32 v2, off, s33 offset:12
; GISEL64-NEXT: scratch_load_b32 v3, off, s33 offset:16
; GISEL64-NEXT: scratch_load_b32 v4, off, s33 offset:20
; GISEL64-NEXT: scratch_load_b32 v5, off, s33 offset:24
; GISEL64-NEXT: scratch_load_b32 v6, off, s33 offset:28
; GISEL64-NEXT: scratch_load_b32 v7, off, s33 offset:32
; GISEL64-NEXT: scratch_load_b32 v8, off, s33 offset:36
; GISEL64-NEXT: scratch_load_b32 v9, off, s33 offset:40
; GISEL64-NEXT: scratch_load_b32 v10, off, s33 offset:44
; GISEL64-NEXT: scratch_load_b32 v11, off, s33 offset:48
; GISEL64-NEXT: scratch_load_b32 v12, off, s33 offset:52
; GISEL64-NEXT: scratch_load_b32 v13, off, s33 offset:56
; GISEL64-NEXT: scratch_load_b32 v14, off, s33 offset:60
; GISEL64-NEXT: scratch_load_b32 v15, off, s33 offset:64
; GISEL64-NEXT: scratch_load_b32 v16, off, s33 offset:68
; GISEL64-NEXT: scratch_load_b32 v17, off, s33 offset:72
; GISEL64-NEXT: scratch_load_b32 v18, off, s33 offset:76
; GISEL64-NEXT: scratch_load_b32 v19, off, s33 offset:80
; GISEL64-NEXT: scratch_load_b32 v20, off, s33 offset:84
; GISEL64-NEXT: scratch_load_b32 v21, off, s33 offset:88
; GISEL64-NEXT: scratch_load_b32 v22, off, s33 offset:92
; GISEL64-NEXT: scratch_load_b32 v23, off, s33 offset:96
; GISEL64-NEXT: scratch_load_b32 v24, off, s33 offset:100
; GISEL64-NEXT: scratch_load_b32 v25, off, s33 offset:104
; GISEL64-NEXT: scratch_load_b32 v26, off, s33 offset:108
; GISEL64-NEXT: scratch_load_b32 v27, off, s33 offset:112
; GISEL64-NEXT: scratch_load_b32 v28, off, s33 offset:116
; GISEL64-NEXT: scratch_load_b32 v29, off, s33 offset:120
; GISEL64-NEXT: scratch_load_b32 v30, off, s33 offset:124
; GISEL64-NEXT: scratch_load_b32 v31, off, s33 offset:128
; GISEL64-NEXT: s_clause 0x1f
; GISEL64-NEXT: scratch_load_b32 v32, off, s33 offset:132
; GISEL64-NEXT: scratch_load_b32 v33, off, s33 offset:136
; GISEL64-NEXT: scratch_load_b32 v34, off, s33 offset:140
; GISEL64-NEXT: scratch_load_b32 v35, off, s33 offset:144
; GISEL64-NEXT: scratch_load_b32 v36, off, s33 offset:148
; GISEL64-NEXT: scratch_load_b32 v37, off, s33 offset:152
; GISEL64-NEXT: scratch_load_b32 v38, off, s33 offset:156
; GISEL64-NEXT: scratch_load_b32 v39, off, s33 offset:160
; GISEL64-NEXT: scratch_load_b32 v48, off, s33 offset:172
; GISEL64-NEXT: scratch_load_b32 v49, off, s33 offset:176
; GISEL64-NEXT: scratch_load_b32 v50, off, s33 offset:180
; GISEL64-NEXT: scratch_load_b32 v51, off, s33 offset:184
; GISEL64-NEXT: scratch_load_b32 v52, off, s33 offset:188
; GISEL64-NEXT: scratch_load_b32 v53, off, s33 offset:192
; GISEL64-NEXT: scratch_load_b32 v54, off, s33 offset:196
; GISEL64-NEXT: scratch_load_b32 v55, off, s33 offset:200
; GISEL64-NEXT: scratch_load_b32 v64, off, s33 offset:204
; GISEL64-NEXT: scratch_load_b32 v65, off, s33 offset:208
; GISEL64-NEXT: scratch_load_b32 v66, off, s33 offset:212
; GISEL64-NEXT: scratch_load_b32 v67, off, s33 offset:216
; GISEL64-NEXT: scratch_load_b32 v68, off, s33 offset:220
; GISEL64-NEXT: scratch_load_b32 v69, off, s33 offset:224
; GISEL64-NEXT: scratch_load_b32 v70, off, s33 offset:228
; GISEL64-NEXT: scratch_load_b32 v71, off, s33 offset:232
; GISEL64-NEXT: scratch_load_b32 v80, off, s33 offset:236
; GISEL64-NEXT: scratch_load_b32 v81, off, s33 offset:240
; GISEL64-NEXT: scratch_load_b32 v82, off, s33 offset:244
; GISEL64-NEXT: scratch_load_b32 v83, off, s33 offset:248
; GISEL64-NEXT: scratch_load_b32 v84, off, s33 offset:252
; GISEL64-NEXT: scratch_load_b32 v85, off, s33 offset:256
; GISEL64-NEXT: scratch_load_b32 v86, off, s33 offset:260
; GISEL64-NEXT: scratch_load_b32 v87, off, s33 offset:264
; GISEL64-NEXT: s_clause 0x1f
; GISEL64-NEXT: scratch_load_b32 v96, off, s33 offset:268
; GISEL64-NEXT: scratch_load_b32 v97, off, s33 offset:272
; GISEL64-NEXT: scratch_load_b32 v98, off, s33 offset:276
; GISEL64-NEXT: scratch_load_b32 v99, off, s33 offset:280
; GISEL64-NEXT: scratch_load_b32 v100, off, s33 offset:284
; GISEL64-NEXT: scratch_load_b32 v101, off, s33 offset:288
; GISEL64-NEXT: scratch_load_b32 v102, off, s33 offset:292
; GISEL64-NEXT: scratch_load_b32 v103, off, s33 offset:296
; GISEL64-NEXT: scratch_load_b32 v112, off, s33 offset:300
; GISEL64-NEXT: scratch_load_b32 v113, off, s33 offset:304
; GISEL64-NEXT: scratch_load_b32 v114, off, s33 offset:308
; GISEL64-NEXT: scratch_load_b32 v115, off, s33 offset:312
; GISEL64-NEXT: scratch_load_b32 v116, off, s33 offset:316
; GISEL64-NEXT: scratch_load_b32 v117, off, s33 offset:320
; GISEL64-NEXT: scratch_load_b32 v118, off, s33 offset:324
; GISEL64-NEXT: scratch_load_b32 v119, off, s33 offset:328
; GISEL64-NEXT: scratch_load_b32 v128, off, s33 offset:332
; GISEL64-NEXT: scratch_load_b32 v129, off, s33 offset:336
; GISEL64-NEXT: scratch_load_b32 v130, off, s33 offset:340
; GISEL64-NEXT: scratch_load_b32 v131, off, s33 offset:344
; GISEL64-NEXT: scratch_load_b32 v132, off, s33 offset:348
; GISEL64-NEXT: scratch_load_b32 v133, off, s33 offset:352
; GISEL64-NEXT: scratch_load_b32 v134, off, s33 offset:356
; GISEL64-NEXT: scratch_load_b32 v135, off, s33 offset:360
; GISEL64-NEXT: scratch_load_b32 v144, off, s33 offset:364
; GISEL64-NEXT: scratch_load_b32 v145, off, s33 offset:368
; GISEL64-NEXT: scratch_load_b32 v146, off, s33 offset:372
; GISEL64-NEXT: scratch_load_b32 v147, off, s33 offset:376
; GISEL64-NEXT: scratch_load_b32 v148, off, s33 offset:380
; GISEL64-NEXT: scratch_load_b32 v149, off, s33 offset:384
; GISEL64-NEXT: scratch_load_b32 v150, off, s33 offset:388
; GISEL64-NEXT: scratch_load_b32 v151, off, s33 offset:392
; GISEL64-NEXT: s_clause 0x1f
; GISEL64-NEXT: scratch_load_b32 v160, off, s33 offset:396
; GISEL64-NEXT: scratch_load_b32 v161, off, s33 offset:400
; GISEL64-NEXT: scratch_load_b32 v162, off, s33 offset:404
; GISEL64-NEXT: scratch_load_b32 v163, off, s33 offset:408
; GISEL64-NEXT: scratch_load_b32 v164, off, s33 offset:412
; GISEL64-NEXT: scratch_load_b32 v165, off, s33 offset:416
; GISEL64-NEXT: scratch_load_b32 v166, off, s33 offset:420
; GISEL64-NEXT: scratch_load_b32 v167, off, s33 offset:424
; GISEL64-NEXT: scratch_load_b32 v176, off, s33 offset:428
; GISEL64-NEXT: scratch_load_b32 v177, off, s33 offset:432
; GISEL64-NEXT: scratch_load_b32 v178, off, s33 offset:436
; GISEL64-NEXT: scratch_load_b32 v179, off, s33 offset:440
; GISEL64-NEXT: scratch_load_b32 v180, off, s33 offset:444
; GISEL64-NEXT: scratch_load_b32 v181, off, s33 offset:448
; GISEL64-NEXT: scratch_load_b32 v182, off, s33 offset:452
; GISEL64-NEXT: scratch_load_b32 v183, off, s33 offset:456
; GISEL64-NEXT: scratch_load_b32 v192, off, s33 offset:460
; GISEL64-NEXT: scratch_load_b32 v193, off, s33 offset:464
; GISEL64-NEXT: scratch_load_b32 v194, off, s33 offset:468
; GISEL64-NEXT: scratch_load_b32 v195, off, s33 offset:472
; GISEL64-NEXT: scratch_load_b32 v196, off, s33 offset:476
; GISEL64-NEXT: scratch_load_b32 v197, off, s33 offset:480
; GISEL64-NEXT: scratch_load_b32 v198, off, s33 offset:484
; GISEL64-NEXT: scratch_load_b32 v199, off, s33 offset:488
; GISEL64-NEXT: scratch_load_b32 v208, off, s33 offset:492
; GISEL64-NEXT: scratch_load_b32 v209, off, s33 offset:496
; GISEL64-NEXT: scratch_load_b32 v210, off, s33 offset:500
; GISEL64-NEXT: scratch_load_b32 v211, off, s33 offset:504
; GISEL64-NEXT: scratch_load_b32 v212, off, s33 offset:508
; GISEL64-NEXT: scratch_load_b32 v213, off, s33 offset:512
; GISEL64-NEXT: scratch_load_b32 v214, off, s33 offset:516
; GISEL64-NEXT: scratch_load_b32 v215, off, s33 offset:520
; GISEL64-NEXT: s_clause 0xf
; GISEL64-NEXT: scratch_load_b32 v224, off, s33 offset:524
; GISEL64-NEXT: scratch_load_b32 v225, off, s33 offset:528
; GISEL64-NEXT: scratch_load_b32 v226, off, s33 offset:532
; GISEL64-NEXT: scratch_load_b32 v227, off, s33 offset:536
; GISEL64-NEXT: scratch_load_b32 v228, off, s33 offset:540
; GISEL64-NEXT: scratch_load_b32 v229, off, s33 offset:544
; GISEL64-NEXT: scratch_load_b32 v230, off, s33 offset:548
; GISEL64-NEXT: scratch_load_b32 v231, off, s33 offset:552
; GISEL64-NEXT: scratch_load_b32 v240, off, s33 offset:556
; GISEL64-NEXT: scratch_load_b32 v241, off, s33 offset:560
; GISEL64-NEXT: scratch_load_b32 v242, off, s33 offset:564
; GISEL64-NEXT: scratch_load_b32 v243, off, s33 offset:568
; GISEL64-NEXT: scratch_load_b32 v244, off, s33 offset:572
; GISEL64-NEXT: scratch_load_b32 v245, off, s33 offset:576
; GISEL64-NEXT: scratch_load_b32 v246, off, s33 offset:580
; GISEL64-NEXT: scratch_load_b32 v247, off, s33 offset:584
; GISEL64-NEXT: s_mov_b64 exec, s[4:5]
; GISEL64-NEXT: s_mov_b32 s33, s0
; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL64-NEXT: s_wait_alu 0xfffe
; GISEL64-NEXT: s_setpc_b64 s[30:31]
%ret = call float(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @callee, <8 x float> %x) convergent
store float %ret, ptr %p
ret void
}