llvm-project/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll
Diana Picus f2e8e2faff
[AMDGPU] Make chain functions receive a stack pointer (#184616)
Currently, chain functions are free to set up a stack pointer if they
need one, and they assume they can start at scratch offset 0. This is
not correct if CWSR and dynamic VGPRs are both enabled, since in that
case we need to reserve an area at offset 0 for the trap handler, but
only when running on a compute queue (which we determine at runtime).
Rather than duplicate in every chain function the code sequence for
determining if/how much scratch space needs to be reserved, this patch
changes the ABI of chain functions so that they receive a stack pointer
from their caller.

Since chain functions can no longer use plain offsets to access their
own stack, we'll also need to allocate a frame pointer more often (and
sometimes also a base pointer). For simplicity, we use the same
registers that `amdgpu_gfx` functions do (s32, s33, s34). This may
change in the future. Chain functions never return to their caller and
thus don't need to preserve the frame or base pointer.

Another consequence is that now we might need to realign the stack in
some cases (since it no longer starts at the infinitely aligned 0).
2026-03-06 11:01:42 +01:00

1192 lines
56 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GISEL-GFX11 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefix=GISEL-GFX10 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s
declare amdgpu_gfx void @use(...)
define amdgpu_cs_chain void @amdgpu_cs_chain_no_stack({ptr, i32, <4 x i32>} inreg %a, {ptr, i32, <4 x i32>} %b) {
; GISEL-GFX11-LABEL: amdgpu_cs_chain_no_stack:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX11-NEXT: s_endpgm
;
; GISEL-GFX10-LABEL: amdgpu_cs_chain_no_stack:
; GISEL-GFX10: ; %bb.0:
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX10-NEXT: s_endpgm
;
; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_no_stack:
; DAGISEL-GFX11: ; %bb.0:
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX11-NEXT: s_endpgm
;
; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_no_stack:
; DAGISEL-GFX10: ; %bb.0:
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX10-NEXT: s_endpgm
ret void
}
define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call(<4 x i32> inreg %sgpr, <4 x i32> %vgpr) {
; GISEL-GFX11-LABEL: amdgpu_cs_chain_simple_call:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX11-NEXT: v_dual_mov_b32 v4, v8 :: v_dual_mov_b32 v5, v9
; GISEL-GFX11-NEXT: v_dual_mov_b32 v6, v10 :: v_dual_mov_b32 v7, v11
; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GISEL-GFX11-NEXT: s_mov_b32 s4, use@abs32@lo
; GISEL-GFX11-NEXT: s_mov_b32 s5, use@abs32@hi
; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GISEL-GFX11-NEXT: s_endpgm
;
; GISEL-GFX10-LABEL: amdgpu_cs_chain_simple_call:
; GISEL-GFX10: ; %bb.0:
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, v8
; GISEL-GFX10-NEXT: v_mov_b32_e32 v5, v9
; GISEL-GFX10-NEXT: v_mov_b32_e32 v6, v10
; GISEL-GFX10-NEXT: v_mov_b32_e32 v7, v11
; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, s1
; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, s2
; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3
; GISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
; GISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo
; GISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi
; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GISEL-GFX10-NEXT: s_endpgm
;
; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_simple_call:
; DAGISEL-GFX11: ; %bb.0:
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v7, v11 :: v_dual_mov_b32 v6, v10
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v5, v9 :: v_dual_mov_b32 v4, v8
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, use@abs32@hi
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, use@abs32@lo
; DAGISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5]
; DAGISEL-GFX11-NEXT: s_endpgm
;
; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_simple_call:
; DAGISEL-GFX10: ; %bb.0:
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v7, v11
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v6, v10
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v5, v9
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v4, v8
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, s1
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, s2
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3
; DAGISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo
; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
; DAGISEL-GFX10-NEXT: s_endpgm
call amdgpu_gfx void @use(<4 x i32> %sgpr, <4 x i32> %vgpr)
ret void
}
define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 x i32> %vgprs) {
; GISEL-GFX11-LABEL: amdgpu_cs_chain_spill:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 4
; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32
; GISEL-GFX11-NEXT: scratch_store_b32 off, v17, s24
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 8
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 12
; GISEL-GFX11-NEXT: scratch_store_b32 off, v18, s24
; GISEL-GFX11-NEXT: scratch_store_b32 off, v19, s25
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 16
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 20
; GISEL-GFX11-NEXT: scratch_store_b32 off, v20, s24
; GISEL-GFX11-NEXT: scratch_store_b32 off, v21, s25
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 24
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 28
; GISEL-GFX11-NEXT: scratch_store_b32 off, v22, s24
; GISEL-GFX11-NEXT: scratch_store_b32 off, v23, s25
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 32
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 36
; GISEL-GFX11-NEXT: scratch_store_b32 off, v24, s24
; GISEL-GFX11-NEXT: scratch_store_b32 off, v25, s25
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 40
; GISEL-GFX11-NEXT: v_dual_mov_b32 v32, v8 :: v_dual_mov_b32 v33, v9
; GISEL-GFX11-NEXT: v_dual_mov_b32 v34, v10 :: v_dual_mov_b32 v35, v11
; GISEL-GFX11-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v37, v13
; GISEL-GFX11-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v39, v15
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 44
; GISEL-GFX11-NEXT: scratch_store_b32 off, v26, s24
; GISEL-GFX11-NEXT: scratch_store_b32 off, v27, s25
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 48
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 52
; GISEL-GFX11-NEXT: scratch_store_b32 off, v28, s24
; GISEL-GFX11-NEXT: scratch_store_b32 off, v29, s25
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 56
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 60
; GISEL-GFX11-NEXT: scratch_store_b32 off, v30, s24
; GISEL-GFX11-NEXT: scratch_store_b32 off, v31, s25
; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GISEL-GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
; GISEL-GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
; GISEL-GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
; GISEL-GFX11-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13
; GISEL-GFX11-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
; GISEL-GFX11-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17
; GISEL-GFX11-NEXT: v_dual_mov_b32 v18, s18 :: v_dual_mov_b32 v19, s19
; GISEL-GFX11-NEXT: v_dual_mov_b32 v20, s20 :: v_dual_mov_b32 v21, s21
; GISEL-GFX11-NEXT: v_dual_mov_b32 v22, s22 :: v_dual_mov_b32 v23, s23
; GISEL-GFX11-NEXT: v_dual_mov_b32 v24, v32 :: v_dual_mov_b32 v25, v33
; GISEL-GFX11-NEXT: v_dual_mov_b32 v26, v34 :: v_dual_mov_b32 v27, v35
; GISEL-GFX11-NEXT: v_dual_mov_b32 v28, v36 :: v_dual_mov_b32 v29, v37
; GISEL-GFX11-NEXT: v_dual_mov_b32 v30, v38 :: v_dual_mov_b32 v31, v39
; GISEL-GFX11-NEXT: s_mov_b32 s24, use@abs32@lo
; GISEL-GFX11-NEXT: s_mov_b32 s25, use@abs32@hi
; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[24:25]
; GISEL-GFX11-NEXT: s_endpgm
;
; GISEL-GFX10-LABEL: amdgpu_cs_chain_spill:
; GISEL-GFX10: ; %bb.0:
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX10-NEXT: v_mov_b32_e32 v32, v8
; GISEL-GFX10-NEXT: v_mov_b32_e32 v33, v9
; GISEL-GFX10-NEXT: v_mov_b32_e32 v34, v10
; GISEL-GFX10-NEXT: v_mov_b32_e32 v35, v11
; GISEL-GFX10-NEXT: v_mov_b32_e32 v36, v12
; GISEL-GFX10-NEXT: v_mov_b32_e32 v37, v13
; GISEL-GFX10-NEXT: v_mov_b32_e32 v38, v14
; GISEL-GFX10-NEXT: v_mov_b32_e32 v39, v15
; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32
; GISEL-GFX10-NEXT: buffer_store_dword v17, off, s[48:51], s32 offset:4
; GISEL-GFX10-NEXT: buffer_store_dword v18, off, s[48:51], s32 offset:8
; GISEL-GFX10-NEXT: buffer_store_dword v19, off, s[48:51], s32 offset:12
; GISEL-GFX10-NEXT: buffer_store_dword v20, off, s[48:51], s32 offset:16
; GISEL-GFX10-NEXT: buffer_store_dword v21, off, s[48:51], s32 offset:20
; GISEL-GFX10-NEXT: buffer_store_dword v22, off, s[48:51], s32 offset:24
; GISEL-GFX10-NEXT: buffer_store_dword v23, off, s[48:51], s32 offset:28
; GISEL-GFX10-NEXT: buffer_store_dword v24, off, s[48:51], s32 offset:32
; GISEL-GFX10-NEXT: buffer_store_dword v25, off, s[48:51], s32 offset:36
; GISEL-GFX10-NEXT: buffer_store_dword v26, off, s[48:51], s32 offset:40
; GISEL-GFX10-NEXT: buffer_store_dword v27, off, s[48:51], s32 offset:44
; GISEL-GFX10-NEXT: buffer_store_dword v28, off, s[48:51], s32 offset:48
; GISEL-GFX10-NEXT: buffer_store_dword v29, off, s[48:51], s32 offset:52
; GISEL-GFX10-NEXT: buffer_store_dword v30, off, s[48:51], s32 offset:56
; GISEL-GFX10-NEXT: buffer_store_dword v31, off, s[48:51], s32 offset:60
; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, s1
; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, s2
; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3
; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, s4
; GISEL-GFX10-NEXT: v_mov_b32_e32 v5, s5
; GISEL-GFX10-NEXT: v_mov_b32_e32 v6, s6
; GISEL-GFX10-NEXT: v_mov_b32_e32 v7, s7
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, s8
; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, s9
; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, s10
; GISEL-GFX10-NEXT: v_mov_b32_e32 v11, s11
; GISEL-GFX10-NEXT: v_mov_b32_e32 v12, s12
; GISEL-GFX10-NEXT: v_mov_b32_e32 v13, s13
; GISEL-GFX10-NEXT: v_mov_b32_e32 v14, s14
; GISEL-GFX10-NEXT: v_mov_b32_e32 v15, s15
; GISEL-GFX10-NEXT: v_mov_b32_e32 v16, s16
; GISEL-GFX10-NEXT: v_mov_b32_e32 v17, s17
; GISEL-GFX10-NEXT: v_mov_b32_e32 v18, s18
; GISEL-GFX10-NEXT: v_mov_b32_e32 v19, s19
; GISEL-GFX10-NEXT: v_mov_b32_e32 v20, s20
; GISEL-GFX10-NEXT: v_mov_b32_e32 v21, s21
; GISEL-GFX10-NEXT: v_mov_b32_e32 v22, s22
; GISEL-GFX10-NEXT: v_mov_b32_e32 v23, s23
; GISEL-GFX10-NEXT: v_mov_b32_e32 v24, v32
; GISEL-GFX10-NEXT: v_mov_b32_e32 v25, v33
; GISEL-GFX10-NEXT: v_mov_b32_e32 v26, v34
; GISEL-GFX10-NEXT: v_mov_b32_e32 v27, v35
; GISEL-GFX10-NEXT: v_mov_b32_e32 v28, v36
; GISEL-GFX10-NEXT: v_mov_b32_e32 v29, v37
; GISEL-GFX10-NEXT: v_mov_b32_e32 v30, v38
; GISEL-GFX10-NEXT: v_mov_b32_e32 v31, v39
; GISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
; GISEL-GFX10-NEXT: s_mov_b32 s24, use@abs32@lo
; GISEL-GFX10-NEXT: s_mov_b32 s25, use@abs32@hi
; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[24:25]
; GISEL-GFX10-NEXT: s_endpgm
;
; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_spill:
; DAGISEL-GFX11: ; %bb.0:
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 60
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v31, s24
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 56
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 52
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v30, s24
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v29, s25
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 48
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 44
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v28, s24
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v27, s25
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 40
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 36
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v26, s24
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v25, s25
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 32
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 28
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v24, s24
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v23, s25
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 24
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v32, v15 :: v_dual_mov_b32 v33, v14
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v34, v13 :: v_dual_mov_b32 v35, v12
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v36, v11 :: v_dual_mov_b32 v37, v10
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v38, v9 :: v_dual_mov_b32 v39, v8
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 20
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v22, s24
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v21, s25
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 16
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 12
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v20, s24
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v19, s25
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 8
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 4
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v18, s24
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v17, s25
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v18, s18 :: v_dual_mov_b32 v19, s19
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v20, s20 :: v_dual_mov_b32 v21, s21
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v22, s22 :: v_dual_mov_b32 v23, s23
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v24, v39 :: v_dual_mov_b32 v25, v38
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v26, v37 :: v_dual_mov_b32 v27, v36
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v28, v35 :: v_dual_mov_b32 v29, v34
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v30, v33 :: v_dual_mov_b32 v31, v32
; DAGISEL-GFX11-NEXT: s_mov_b32 s25, use@abs32@hi
; DAGISEL-GFX11-NEXT: s_mov_b32 s24, use@abs32@lo
; DAGISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[24:25]
; DAGISEL-GFX11-NEXT: s_endpgm
;
; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_spill:
; DAGISEL-GFX10: ; %bb.0:
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v32, v15
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v33, v14
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v34, v13
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v35, v12
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v36, v11
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v37, v10
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v38, v9
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v39, v8
; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32
; DAGISEL-GFX10-NEXT: buffer_store_dword v17, off, s[48:51], s32 offset:4
; DAGISEL-GFX10-NEXT: buffer_store_dword v18, off, s[48:51], s32 offset:8
; DAGISEL-GFX10-NEXT: buffer_store_dword v19, off, s[48:51], s32 offset:12
; DAGISEL-GFX10-NEXT: buffer_store_dword v20, off, s[48:51], s32 offset:16
; DAGISEL-GFX10-NEXT: buffer_store_dword v21, off, s[48:51], s32 offset:20
; DAGISEL-GFX10-NEXT: buffer_store_dword v22, off, s[48:51], s32 offset:24
; DAGISEL-GFX10-NEXT: buffer_store_dword v23, off, s[48:51], s32 offset:28
; DAGISEL-GFX10-NEXT: buffer_store_dword v24, off, s[48:51], s32 offset:32
; DAGISEL-GFX10-NEXT: buffer_store_dword v25, off, s[48:51], s32 offset:36
; DAGISEL-GFX10-NEXT: buffer_store_dword v26, off, s[48:51], s32 offset:40
; DAGISEL-GFX10-NEXT: buffer_store_dword v27, off, s[48:51], s32 offset:44
; DAGISEL-GFX10-NEXT: buffer_store_dword v28, off, s[48:51], s32 offset:48
; DAGISEL-GFX10-NEXT: buffer_store_dword v29, off, s[48:51], s32 offset:52
; DAGISEL-GFX10-NEXT: buffer_store_dword v30, off, s[48:51], s32 offset:56
; DAGISEL-GFX10-NEXT: buffer_store_dword v31, off, s[48:51], s32 offset:60
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, s1
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, s2
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v4, s4
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v5, s5
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v6, s6
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v7, s7
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, s8
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, s9
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, s10
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v11, s11
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v12, s12
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v13, s13
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v14, s14
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v15, s15
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v16, s16
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v17, s17
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v18, s18
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v19, s19
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v20, s20
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v21, s21
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v22, s22
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v23, s23
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v24, v39
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v25, v38
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v26, v37
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v27, v36
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v28, v35
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v29, v34
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v30, v33
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v31, v32
; DAGISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
; DAGISEL-GFX10-NEXT: s_mov_b32 s25, use@abs32@hi
; DAGISEL-GFX10-NEXT: s_mov_b32 s24, use@abs32@lo
; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[24:25]
; DAGISEL-GFX10-NEXT: s_endpgm
call amdgpu_gfx void @use(<24 x i32> %sgprs, <24 x i32> %vgprs)
ret void
}
define amdgpu_cs_chain void @alloca_and_call() {
; GISEL-GFX11-LABEL: alloca_and_call:
; GISEL-GFX11: ; %bb.0: ; %.entry
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 42
; GISEL-GFX11-NEXT: s_mov_b32 s33, s32
; GISEL-GFX11-NEXT: s_mov_b32 s0, use@abs32@lo
; GISEL-GFX11-NEXT: s_mov_b32 s1, use@abs32@hi
; GISEL-GFX11-NEXT: s_add_i32 s32, s32, 16
; GISEL-GFX11-NEXT: scratch_store_b32 off, v0, s33
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s33
; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GISEL-GFX11-NEXT: s_endpgm
;
; GISEL-GFX10-LABEL: alloca_and_call:
; GISEL-GFX10: ; %bb.0: ; %.entry
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, 42
; GISEL-GFX10-NEXT: s_mov_b32 s33, s32
; GISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
; GISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo
; GISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi
; GISEL-GFX10-NEXT: buffer_store_dword v0, off, s[48:51], s33
; GISEL-GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33
; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
; GISEL-GFX10-NEXT: s_addk_i32 s32, 0x200
; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GISEL-GFX10-NEXT: s_endpgm
;
; DAGISEL-GFX11-LABEL: alloca_and_call:
; DAGISEL-GFX11: ; %bb.0: ; %.entry
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v0, 42
; DAGISEL-GFX11-NEXT: s_mov_b32 s33, s32
; DAGISEL-GFX11-NEXT: s_mov_b32 s1, use@abs32@hi
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, use@abs32@lo
; DAGISEL-GFX11-NEXT: s_add_i32 s32, s32, 16
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v0, s33
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v0, s33
; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; DAGISEL-GFX11-NEXT: s_endpgm
;
; DAGISEL-GFX10-LABEL: alloca_and_call:
; DAGISEL-GFX10: ; %bb.0: ; %.entry
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 42
; DAGISEL-GFX10-NEXT: s_mov_b32 s33, s32
; DAGISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo
; DAGISEL-GFX10-NEXT: buffer_store_dword v0, off, s[48:51], s33
; DAGISEL-GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33
; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
; DAGISEL-GFX10-NEXT: s_addk_i32 s32, 0x200
; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
; DAGISEL-GFX10-NEXT: s_endpgm
.entry:
%v = alloca [3 x i32], addrspace(5)
store i32 42, ptr addrspace(5) %v
call amdgpu_gfx void @use(ptr addrspace(5) %v)
ret void
}
define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %a, <3 x i32> %b) {
; GISEL-GFX11-LABEL: cs_to_chain:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
; GISEL-GFX11-NEXT: ;;#ASMSTART
; GISEL-GFX11-NEXT: s_nop
; GISEL-GFX11-NEXT: ;;#ASMEND
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
; GISEL-GFX11-NEXT: s_mov_b32 s32, 0
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
;
; GISEL-GFX10-LABEL: cs_to_chain:
; GISEL-GFX10: ; %bb.0:
; GISEL-GFX10-NEXT: s_getpc_b64 s[100:101]
; GISEL-GFX10-NEXT: s_mov_b32 s100, s0
; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0
; GISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10
; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1
; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; GISEL-GFX10-NEXT: s_mov_b32 s32, 0
; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-GFX10-NEXT: s_bitset0_b32 s103, 21
; GISEL-GFX10-NEXT: s_add_u32 s100, s100, s3
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
; GISEL-GFX10-NEXT: ;;#ASMSTART
; GISEL-GFX10-NEXT: s_nop
; GISEL-GFX10-NEXT: ;;#ASMEND
; GISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3
; GISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101]
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
; GISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103]
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
;
; DAGISEL-GFX11-LABEL: cs_to_chain:
; DAGISEL-GFX11: ; %bb.0:
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
; DAGISEL-GFX11-NEXT: s_nop
; DAGISEL-GFX11-NEXT: ;;#ASMEND
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; DAGISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
;
; DAGISEL-GFX10-LABEL: cs_to_chain:
; DAGISEL-GFX10: ; %bb.0:
; DAGISEL-GFX10-NEXT: s_getpc_b64 s[100:101]
; DAGISEL-GFX10-NEXT: s_mov_b32 s100, s0
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0
; DAGISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0
; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
; DAGISEL-GFX10-NEXT: s_bitset0_b32 s103, 21
; DAGISEL-GFX10-NEXT: s_add_u32 s100, s100, s3
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
; DAGISEL-GFX10-NEXT: s_nop
; DAGISEL-GFX10-NEXT: ;;#ASMEND
; DAGISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3
; DAGISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101]
; DAGISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103]
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
unreachable
}
; Chain call with SGPR arguments that we cannot prove are uniform.
define amdgpu_cs void @cs_to_chain_nonuniform(<3 x i32> %a, <3 x i32> %b) {
; GISEL-GFX11-LABEL: cs_to_chain_nonuniform:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GISEL-GFX11-NEXT: v_readfirstlane_b32 s1, v1
; GISEL-GFX11-NEXT: v_readfirstlane_b32 s2, v2
; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
; GISEL-GFX11-NEXT: v_mov_b32_e32 v10, v5
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; GISEL-GFX11-NEXT: s_mov_b32 s32, 0
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
;
; GISEL-GFX10-LABEL: cs_to_chain_nonuniform:
; GISEL-GFX10: ; %bb.0:
; GISEL-GFX10-NEXT: s_getpc_b64 s[100:101]
; GISEL-GFX10-NEXT: s_mov_b32 s100, s0
; GISEL-GFX10-NEXT: v_readfirstlane_b32 s1, v1
; GISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10
; GISEL-GFX10-NEXT: v_readfirstlane_b32 s2, v2
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3
; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v4
; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v5
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; GISEL-GFX10-NEXT: s_mov_b32 s32, 0
; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-GFX10-NEXT: s_bitset0_b32 s103, 21
; GISEL-GFX10-NEXT: s_add_u32 s100, s100, s0
; GISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0
; GISEL-GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101]
; GISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103]
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
;
; DAGISEL-GFX11-LABEL: cs_to_chain_nonuniform:
; DAGISEL-GFX11: ; %bb.0:
; DAGISEL-GFX11-NEXT: v_readfirstlane_b32 s0, v0
; DAGISEL-GFX11-NEXT: v_readfirstlane_b32 s1, v1
; DAGISEL-GFX11-NEXT: v_readfirstlane_b32 s2, v2
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v10, v5
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
;
; DAGISEL-GFX10-LABEL: cs_to_chain_nonuniform:
; DAGISEL-GFX10: ; %bb.0:
; DAGISEL-GFX10-NEXT: s_getpc_b64 s[100:101]
; DAGISEL-GFX10-NEXT: s_mov_b32 s100, s0
; DAGISEL-GFX10-NEXT: v_readfirstlane_b32 s1, v1
; DAGISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10
; DAGISEL-GFX10-NEXT: v_readfirstlane_b32 s2, v2
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v4
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v5
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0
; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
; DAGISEL-GFX10-NEXT: s_bitset0_b32 s103, 21
; DAGISEL-GFX10-NEXT: s_add_u32 s100, s100, s0
; DAGISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0
; DAGISEL-GFX10-NEXT: v_readfirstlane_b32 s0, v0
; DAGISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101]
; DAGISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103]
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
unreachable
}
define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %a, <3 x i32> %b) {
; GISEL-GFX11-LABEL: chain_to_chain:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
; GISEL-GFX11-NEXT: ;;#ASMSTART
; GISEL-GFX11-NEXT: s_nop
; GISEL-GFX11-NEXT: ;;#ASMEND
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
;
; GISEL-GFX10-LABEL: chain_to_chain:
; GISEL-GFX10: ; %bb.0:
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
; GISEL-GFX10-NEXT: ;;#ASMSTART
; GISEL-GFX10-NEXT: s_nop
; GISEL-GFX10-NEXT: ;;#ASMEND
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
;
; DAGISEL-GFX11-LABEL: chain_to_chain:
; DAGISEL-GFX11: ; %bb.0:
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
; DAGISEL-GFX11-NEXT: s_nop
; DAGISEL-GFX11-NEXT: ;;#ASMEND
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
;
; DAGISEL-GFX10-LABEL: chain_to_chain:
; DAGISEL-GFX10: ; %bb.0:
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
; DAGISEL-GFX10-NEXT: s_nop
; DAGISEL-GFX10-NEXT: ;;#ASMEND
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
unreachable
}
define amdgpu_cs_chain void @chain_to_chain_local(<3 x i32> inreg %a, <3 x i32> %b) {
; GISEL-GFX11-LABEL: chain_to_chain_local:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 5
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; GISEL-GFX11-NEXT: scratch_store_b32 off, v0, s32 dlc
; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
;
; GISEL-GFX10-LABEL: chain_to_chain_local:
; GISEL-GFX10: ; %bb.0:
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, 5
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; GISEL-GFX10-NEXT: buffer_store_dword v0, off, s[48:51], s32
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
;
; DAGISEL-GFX11-LABEL: chain_to_chain_local:
; DAGISEL-GFX11: ; %bb.0:
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v0, 5
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v0, s32 dlc
; DAGISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
;
; DAGISEL-GFX10-LABEL: chain_to_chain_local:
; DAGISEL-GFX10: ; %bb.0:
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 5
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; DAGISEL-GFX10-NEXT: buffer_store_dword v0, off, s[48:51], s32
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
%v = alloca i32, i32 4, addrspace(5)
store volatile i32 5, ptr addrspace(5) %v
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
unreachable
}
define amdgpu_cs_chain void @chain_to_chain_wwm(<3 x i32> inreg %a, <3 x i32> %b) {
; GISEL-GFX11-LABEL: chain_to_chain_wwm:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
; GISEL-GFX11-NEXT: s_or_saveexec_b32 s0, -1
; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_cndmask_b32_e64 v1, 4, 3, s0
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, s0
; GISEL-GFX11-NEXT: v_mov_b32_e32 v2, v1
; GISEL-GFX11-NEXT: ;;#ASMSTART
; GISEL-GFX11-NEXT: s_nop
; GISEL-GFX11-NEXT: ;;#ASMEND
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v2
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
;
; GISEL-GFX10-LABEL: chain_to_chain_wwm:
; GISEL-GFX10: ; %bb.0:
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
; GISEL-GFX10-NEXT: s_or_saveexec_b32 s0, -1
; GISEL-GFX10-NEXT: v_cndmask_b32_e64 v1, 4, 3, s0
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, s0
; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, v1
; GISEL-GFX10-NEXT: ;;#ASMSTART
; GISEL-GFX10-NEXT: s_nop
; GISEL-GFX10-NEXT: ;;#ASMEND
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v2
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
;
; DAGISEL-GFX11-LABEL: chain_to_chain_wwm:
; DAGISEL-GFX11: ; %bb.0:
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX11-NEXT: s_or_saveexec_b32 s4, -1
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
; DAGISEL-GFX11-NEXT: v_cndmask_b32_e64 v1, 4, 3, s4
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, s4
; DAGISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v2, v1
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
; DAGISEL-GFX11-NEXT: s_nop
; DAGISEL-GFX11-NEXT: ;;#ASMEND
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v2
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
;
; DAGISEL-GFX10-LABEL: chain_to_chain_wwm:
; DAGISEL-GFX10: ; %bb.0:
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX10-NEXT: s_or_saveexec_b32 s4, -1
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
; DAGISEL-GFX10-NEXT: v_cndmask_b32_e64 v1, 4, 3, s4
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, s4
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, v1
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
; DAGISEL-GFX10-NEXT: s_nop
; DAGISEL-GFX10-NEXT: ;;#ASMEND
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v2
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
%i = call i32 @llvm.amdgcn.set.inactive(i32 3, i32 4)
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
%w = call i32 @llvm.amdgcn.wwm(i32 %i)
%c = insertelement <3 x i32> %b, i32 %w, i32 0
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %c, i32 0)
unreachable
}
define amdgpu_cs_chain void @chain_to_chain_use_all_v0_v7(<3 x i32> inreg %a, <3 x i32> %b) {
; GISEL-GFX11-LABEL: chain_to_chain_use_all_v0_v7:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
; GISEL-GFX11-NEXT: ;;#ASMSTART
; GISEL-GFX11-NEXT: s_nop
; GISEL-GFX11-NEXT: ;;#ASMEND
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v11
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
;
; GISEL-GFX10-LABEL: chain_to_chain_use_all_v0_v7:
; GISEL-GFX10: ; %bb.0:
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
; GISEL-GFX10-NEXT: ;;#ASMSTART
; GISEL-GFX10-NEXT: s_nop
; GISEL-GFX10-NEXT: ;;#ASMEND
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v11
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
;
; DAGISEL-GFX11-LABEL: chain_to_chain_use_all_v0_v7:
; DAGISEL-GFX11: ; %bb.0:
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
; DAGISEL-GFX11-NEXT: s_nop
; DAGISEL-GFX11-NEXT: ;;#ASMEND
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v11
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
;
; DAGISEL-GFX10-LABEL: chain_to_chain_use_all_v0_v7:
; DAGISEL-GFX10: ; %bb.0:
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
; DAGISEL-GFX10-NEXT: s_nop
; DAGISEL-GFX10-NEXT: ;;#ASMEND
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v11
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
call void asm "s_nop", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v16},~{s0}"()
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
unreachable
}
define amdgpu_cs_chain void @chain_to_chain_fewer_args(<3 x i32> inreg %a, <3 x i32> %b) {
; GISEL-GFX11-LABEL: chain_to_chain_fewer_args:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
; GISEL-GFX11-NEXT: s_mov_b32 s2, s0
; GISEL-GFX11-NEXT: ;;#ASMSTART
; GISEL-GFX11-NEXT: s_nop
; GISEL-GFX11-NEXT: ;;#ASMEND
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
; GISEL-GFX11-NEXT: s_mov_b32 s0, s2
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
;
; GISEL-GFX10-LABEL: chain_to_chain_fewer_args:
; GISEL-GFX10: ; %bb.0:
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
; GISEL-GFX10-NEXT: s_mov_b32 s2, s0
; GISEL-GFX10-NEXT: ;;#ASMSTART
; GISEL-GFX10-NEXT: s_nop
; GISEL-GFX10-NEXT: ;;#ASMEND
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
; GISEL-GFX10-NEXT: s_mov_b32 s0, s2
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
;
; DAGISEL-GFX11-LABEL: chain_to_chain_fewer_args:
; DAGISEL-GFX11: ; %bb.0:
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
; DAGISEL-GFX11-NEXT: s_mov_b32 s2, s0
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
; DAGISEL-GFX11-NEXT: s_nop
; DAGISEL-GFX11-NEXT: ;;#ASMEND
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s2
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
;
; DAGISEL-GFX10-LABEL: chain_to_chain_fewer_args:
; DAGISEL-GFX10: ; %bb.0:
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
; DAGISEL-GFX10-NEXT: s_mov_b32 s2, s0
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
; DAGISEL-GFX10-NEXT: s_nop
; DAGISEL-GFX10-NEXT: ;;#ASMEND
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s2
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
%s = shufflevector <3 x i32> %a, <3 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
%v = shufflevector <3 x i32> %b, <3 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
call void(ptr, i32, <2 x i32>, <2 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v2i32(ptr @chain_callee_2, i32 -1, <2 x i32> inreg %s, <2 x i32> %v, i32 0)
unreachable
}
define amdgpu_cs_chain void @chain_to_chain_more_args(<3 x i32> inreg %a, <3 x i32> %b) {
; GISEL-GFX11-LABEL: chain_to_chain_more_args:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
; GISEL-GFX11-NEXT: ;;#ASMSTART
; GISEL-GFX11-NEXT: s_nop
; GISEL-GFX11-NEXT: ;;#ASMEND
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v11, 0
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
; GISEL-GFX11-NEXT: s_mov_b32 s3, 0
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
;
; GISEL-GFX10-LABEL: chain_to_chain_more_args:
; GISEL-GFX10: ; %bb.0:
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
; GISEL-GFX10-NEXT: ;;#ASMSTART
; GISEL-GFX10-NEXT: s_nop
; GISEL-GFX10-NEXT: ;;#ASMEND
; GISEL-GFX10-NEXT: v_mov_b32_e32 v11, 0
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
; GISEL-GFX10-NEXT: s_mov_b32 s3, 0
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
;
; DAGISEL-GFX11-LABEL: chain_to_chain_more_args:
; DAGISEL-GFX11: ; %bb.0:
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
; DAGISEL-GFX11-NEXT: s_nop
; DAGISEL-GFX11-NEXT: ;;#ASMEND
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v11, 0
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, 0
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
;
; DAGISEL-GFX10-LABEL: chain_to_chain_more_args:
; DAGISEL-GFX10: ; %bb.0:
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
; DAGISEL-GFX10-NEXT: s_nop
; DAGISEL-GFX10-NEXT: ;;#ASMEND
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v11, 0
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, 0
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
%s = shufflevector <3 x i32> %a, <3 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%v = shufflevector <3 x i32> %b, <3 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
call void(ptr, i32, <4 x i32>, <4 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v4i32(ptr @chain_callee_2, i32 -1, <4 x i32> inreg %s, <4 x i32> %v, i32 0)
unreachable
}
define amdgpu_cs_chain void @amdgpu_cs_chain_realign_stack(i32 %idx) {
; GISEL-GFX11-LABEL: amdgpu_cs_chain_realign_stack:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX11-NEXT: s_mov_b32 s3, 4
; GISEL-GFX11-NEXT: s_mov_b32 s2, 3
; GISEL-GFX11-NEXT: s_mov_b32 s1, 2
; GISEL-GFX11-NEXT: s_mov_b32 s0, 1
; GISEL-GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v8
; GISEL-GFX11-NEXT: s_add_i32 s33, s32, 31
; GISEL-GFX11-NEXT: s_mov_b32 s34, s32
; GISEL-GFX11-NEXT: s_and_not1_b32 s33, s33, 31
; GISEL-GFX11-NEXT: s_addk_i32 s32, 0xc0
; GISEL-GFX11-NEXT: v_add_nc_u32_e32 v4, s33, v0
; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GISEL-GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc
; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX11-NEXT: s_endpgm
;
; GISEL-GFX10-LABEL: amdgpu_cs_chain_realign_stack:
; GISEL-GFX10: ; %bb.0:
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX10-NEXT: s_add_i32 s33, s32, 0x3e0
; GISEL-GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v8
; GISEL-GFX10-NEXT: s_and_b32 s33, s33, 0xfffffc00
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 1
; GISEL-GFX10-NEXT: v_lshrrev_b32_e64 v2, 5, s33
; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, 3
; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, 4
; GISEL-GFX10-NEXT: s_mov_b32 s34, s32
; GISEL-GFX10-NEXT: s_addk_i32 s32, 0x1800
; GISEL-GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2
; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, 2
; GISEL-GFX10-NEXT: buffer_store_dword v1, v0, s[48:51], 0 offen
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX10-NEXT: buffer_store_dword v2, v0, s[48:51], 0 offen offset:4
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX10-NEXT: buffer_store_dword v3, v0, s[48:51], 0 offen offset:8
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX10-NEXT: buffer_store_dword v4, v0, s[48:51], 0 offen offset:12
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX10-NEXT: s_endpgm
;
; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_realign_stack:
; DAGISEL-GFX11: ; %bb.0:
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX11-NEXT: s_add_i32 s33, s32, 31
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
; DAGISEL-GFX11-NEXT: s_and_not1_b32 s33, s33, 31
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
; DAGISEL-GFX11-NEXT: v_lshl_add_u32 v4, v8, 4, s33
; DAGISEL-GFX11-NEXT: s_mov_b32 s34, s32
; DAGISEL-GFX11-NEXT: s_addk_i32 s32, 0xc0
; DAGISEL-GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc
; DAGISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; DAGISEL-GFX11-NEXT: s_endpgm
;
; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_realign_stack:
; DAGISEL-GFX10: ; %bb.0:
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX10-NEXT: s_add_i32 s33, s32, 0x3e0
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 4
; DAGISEL-GFX10-NEXT: s_and_b32 s33, s33, 0xfffffc00
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, 2
; DAGISEL-GFX10-NEXT: v_lshrrev_b32_e64 v2, 5, s33
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v4, 1
; DAGISEL-GFX10-NEXT: s_mov_b32 s34, s32
; DAGISEL-GFX10-NEXT: s_addk_i32 s32, 0x1800
; DAGISEL-GFX10-NEXT: v_lshl_add_u32 v1, v8, 4, v2
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, 3
; DAGISEL-GFX10-NEXT: buffer_store_dword v0, v1, s[48:51], 0 offen offset:12
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; DAGISEL-GFX10-NEXT: buffer_store_dword v2, v1, s[48:51], 0 offen offset:8
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; DAGISEL-GFX10-NEXT: buffer_store_dword v3, v1, s[48:51], 0 offen offset:4
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; DAGISEL-GFX10-NEXT: buffer_store_dword v4, v1, s[48:51], 0 offen
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; DAGISEL-GFX10-NEXT: s_endpgm
%alloca.align32 = alloca [8 x <4 x i32>], align 32, addrspace(5)
%gep0 = getelementptr inbounds [8 x <4 x i32>], ptr addrspace(5) %alloca.align32, i32 0, i32 %idx
store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr addrspace(5) %gep0, align 32
ret void
}
define amdgpu_cs_chain void @amdgpu_cs_chain_realign_stack_chain_call(i32 %idx, <3 x i32> inreg %a, <3 x i32> %b) {
; GISEL-GFX11-LABEL: amdgpu_cs_chain_realign_stack_chain_call:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX11-NEXT: s_mov_b32 s7, 4
; GISEL-GFX11-NEXT: s_mov_b32 s6, 3
; GISEL-GFX11-NEXT: s_mov_b32 s5, 2
; GISEL-GFX11-NEXT: s_mov_b32 s4, 1
; GISEL-GFX11-NEXT: v_lshlrev_b32_e32 v4, 4, v8
; GISEL-GFX11-NEXT: s_add_i32 s33, s32, 31
; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v3, s7
; GISEL-GFX11-NEXT: s_and_not1_b32 s33, s33, 31
; GISEL-GFX11-NEXT: v_dual_mov_b32 v1, s5 :: v_dual_mov_b32 v2, s6
; GISEL-GFX11-NEXT: v_add_nc_u32_e32 v4, s33, v4
; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v9 :: v_dual_mov_b32 v9, v10
; GISEL-GFX11-NEXT: v_mov_b32_e32 v10, v11
; GISEL-GFX11-NEXT: s_mov_b32 s34, s32
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; GISEL-GFX11-NEXT: s_addk_i32 s32, 0xc0
; GISEL-GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc
; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX11-NEXT: s_mov_b32 s32, s34
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
;
; GISEL-GFX10-LABEL: amdgpu_cs_chain_realign_stack_chain_call:
; GISEL-GFX10: ; %bb.0:
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX10-NEXT: s_add_i32 s33, s32, 0x3e0
; GISEL-GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v8
; GISEL-GFX10-NEXT: s_and_b32 s33, s33, 0xfffffc00
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 1
; GISEL-GFX10-NEXT: v_lshrrev_b32_e64 v2, 5, s33
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v9
; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v10
; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v11
; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, 3
; GISEL-GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2
; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, 2
; GISEL-GFX10-NEXT: s_mov_b32 s34, s32
; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, 4
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; GISEL-GFX10-NEXT: s_addk_i32 s32, 0x1800
; GISEL-GFX10-NEXT: buffer_store_dword v1, v0, s[48:51], 0 offen
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX10-NEXT: buffer_store_dword v2, v0, s[48:51], 0 offen offset:4
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX10-NEXT: buffer_store_dword v3, v0, s[48:51], 0 offen offset:8
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX10-NEXT: buffer_store_dword v4, v0, s[48:51], 0 offen offset:12
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX10-NEXT: s_mov_b32 s32, s34
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
;
; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_realign_stack_chain_call:
; DAGISEL-GFX11: ; %bb.0:
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX11-NEXT: s_add_i32 s33, s32, 31
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
; DAGISEL-GFX11-NEXT: s_and_not1_b32 s33, s33, 31
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
; DAGISEL-GFX11-NEXT: v_lshl_add_u32 v4, v8, 4, s33
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v9 :: v_dual_mov_b32 v9, v10
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v10, v11
; DAGISEL-GFX11-NEXT: s_mov_b32 s34, s32
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; DAGISEL-GFX11-NEXT: s_addk_i32 s32, 0xc0
; DAGISEL-GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc
; DAGISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; DAGISEL-GFX11-NEXT: s_mov_b32 s32, s34
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
;
; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_realign_stack_chain_call:
; DAGISEL-GFX10: ; %bb.0:
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX10-NEXT: s_add_i32 s33, s32, 0x3e0
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, 3
; DAGISEL-GFX10-NEXT: s_and_b32 s33, s33, 0xfffffc00
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, 2
; DAGISEL-GFX10-NEXT: v_lshrrev_b32_e64 v1, 5, s33
; DAGISEL-GFX10-NEXT: s_mov_b32 s34, s32
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v4, 1
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
; DAGISEL-GFX10-NEXT: v_lshl_add_u32 v0, v8, 4, v1
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, 4
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v9
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v10
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v11
; DAGISEL-GFX10-NEXT: s_addk_i32 s32, 0x1800
; DAGISEL-GFX10-NEXT: buffer_store_dword v1, v0, s[48:51], 0 offen offset:12
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; DAGISEL-GFX10-NEXT: buffer_store_dword v2, v0, s[48:51], 0 offen offset:8
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; DAGISEL-GFX10-NEXT: buffer_store_dword v3, v0, s[48:51], 0 offen offset:4
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; DAGISEL-GFX10-NEXT: buffer_store_dword v4, v0, s[48:51], 0 offen
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; DAGISEL-GFX10-NEXT: s_mov_b32 s32, s34
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
%alloca.align32 = alloca [8 x <4 x i32>], align 32, addrspace(5)
%gep0 = getelementptr inbounds [8 x <4 x i32>], ptr addrspace(5) %alloca.align32, i32 0, i32 %idx
store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr addrspace(5) %gep0, align 32
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
unreachable
}
declare void @llvm.amdgcn.cs.chain.v2i32(ptr, i32, <2 x i32>, <2 x i32>, i32, ...)
declare void @llvm.amdgcn.cs.chain.v3i32(ptr, i32, <3 x i32>, <3 x i32>, i32, ...)
declare void @llvm.amdgcn.cs.chain.v4i32(ptr, i32, <4 x i32>, <4 x i32>, i32, ...)
declare amdgpu_cs_chain void @chain_callee_2(<2 x i32> inreg, <2 x i32>)
declare amdgpu_cs_chain void @chain_callee(<3 x i32> inreg, <3 x i32>)
declare amdgpu_cs_chain void @chain_callee_4(<4 x i32> inreg, <4 x i32>)
declare i32 @llvm.amdgcn.set.inactive(i32, i32)
declare i32 @llvm.amdgcn.wwm(i32)