Currently, chain functions are free to set up a stack pointer if they need one, and they assume they can start at scratch offset 0. This is not correct if CWSR and dynamic VGPRs are both enabled, since in that case we need to reserve an area at offset 0 for the trap handler, but only when running on a compute queue (which we determine at runtime). Rather than duplicate in every chain function the code sequence for determining if/how much scratch space needs to be reserved, this patch changes the ABI of chain functions so that they receive a stack pointer from their caller. Since chain functions can no longer use plain offsets to access their own stack, we'll also need to allocate a frame pointer more often (and sometimes also a base pointer). For simplicity, we use the same registers that `amdgpu_gfx` functions do (s32, s33, s34). This may change in the future. Chain functions never return to their caller and thus don't need to preserve the frame or base pointer. Another consequence is that now we might need to realign the stack in some cases (since it no longer starts at the infinitely aligned 0).
741 lines
36 KiB
LLVM
741 lines
36 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GISEL-GFX11 %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefix=GISEL-GFX10 %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s
|
|
|
|
define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_no_stack({ptr, i32, <4 x i32>} inreg %a, {ptr, i32, <4 x i32>} %b) {
|
|
; GISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_no_stack:
|
|
; GISEL-GFX11: ; %bb.0:
|
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX11-NEXT: s_endpgm
|
|
;
|
|
; GISEL-GFX10-LABEL: amdgpu_cs_chain_preserve_no_stack:
|
|
; GISEL-GFX10: ; %bb.0:
|
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: s_endpgm
|
|
;
|
|
; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_no_stack:
|
|
; DAGISEL-GFX11: ; %bb.0:
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX11-NEXT: s_endpgm
|
|
;
|
|
; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_preserve_no_stack:
|
|
; DAGISEL-GFX10: ; %bb.0:
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: s_endpgm
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) {
|
|
; GISEL-GFX11-LABEL: cs_to_chain_preserve:
|
|
; GISEL-GFX11: ; %bb.0:
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX11-NEXT: s_nop
|
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo
|
|
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s32, 0
|
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; GISEL-GFX10-LABEL: cs_to_chain_preserve:
|
|
; GISEL-GFX10: ; %bb.0:
|
|
; GISEL-GFX10-NEXT: s_getpc_b64 s[100:101]
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s100, s0
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0
|
|
; GISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s32, 0
|
|
; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: s_bitset0_b32 s103, 21
|
|
; GISEL-GFX10-NEXT: s_add_u32 s100, s100, s3
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX10-NEXT: s_nop
|
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
|
; GISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3
|
|
; GISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101]
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103]
|
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX11-LABEL: cs_to_chain_preserve:
|
|
; DAGISEL-GFX11: ; %bb.0:
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX11-NEXT: s_nop
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi
|
|
; DAGISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX10-LABEL: cs_to_chain_preserve:
|
|
; DAGISEL-GFX10: ; %bb.0:
|
|
; DAGISEL-GFX10-NEXT: s_getpc_b64 s[100:101]
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s100, s0
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0
|
|
; DAGISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: s_bitset0_b32 s103, 21
|
|
; DAGISEL-GFX10-NEXT: s_add_u32 s100, s100, s3
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX10-NEXT: s_nop
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3
|
|
; DAGISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101]
|
|
; DAGISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103]
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
|
|
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
|
|
unreachable
|
|
}
|
|
|
|
define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) {
|
|
; GISEL-GFX11-LABEL: chain_to_chain_preserve:
|
|
; GISEL-GFX11: ; %bb.0:
|
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX11-NEXT: s_nop
|
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; GISEL-GFX10-LABEL: chain_to_chain_preserve:
|
|
; GISEL-GFX10: ; %bb.0:
|
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX10-NEXT: s_nop
|
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX11-LABEL: chain_to_chain_preserve:
|
|
; DAGISEL-GFX11: ; %bb.0:
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX11-NEXT: s_nop
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX10-LABEL: chain_to_chain_preserve:
|
|
; DAGISEL-GFX10: ; %bb.0:
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX10-NEXT: s_nop
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
|
|
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
|
|
unreachable
|
|
}
|
|
|
|
define amdgpu_cs_chain_preserve void @chain_preserve_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) {
|
|
; GISEL-GFX11-LABEL: chain_preserve_to_chain_preserve:
|
|
; GISEL-GFX11: ; %bb.0:
|
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; 4-byte Folded Spill
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX11-NEXT: s_nop
|
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
|
; GISEL-GFX11-NEXT: scratch_load_b32 v16, off, s32 ; 4-byte Folded Reload
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; GISEL-GFX10-LABEL: chain_preserve_to_chain_preserve:
|
|
; GISEL-GFX10: ; %bb.0:
|
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX10-NEXT: s_nop
|
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
|
; GISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], s32 ; 4-byte Folded Reload
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_preserve:
|
|
; DAGISEL-GFX11: ; %bb.0:
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; 4-byte Folded Spill
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX11-NEXT: s_nop
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX11-NEXT: scratch_load_b32 v16, off, s32 ; 4-byte Folded Reload
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_preserve:
|
|
; DAGISEL-GFX10: ; %bb.0:
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX10-NEXT: s_nop
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], s32 ; 4-byte Folded Reload
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
|
|
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
|
|
unreachable
|
|
}
|
|
|
|
define amdgpu_cs_chain_preserve void @chain_preserve_to_chain(<3 x i32> inreg %a, <3 x i32> %b) {
|
|
; GISEL-GFX11-LABEL: chain_preserve_to_chain:
|
|
; GISEL-GFX11: ; %bb.0:
|
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; 4-byte Folded Spill
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX11-NEXT: s_nop
|
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
|
; GISEL-GFX11-NEXT: scratch_load_b32 v16, off, s32 ; 4-byte Folded Reload
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; GISEL-GFX10-LABEL: chain_preserve_to_chain:
|
|
; GISEL-GFX10: ; %bb.0:
|
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX10-NEXT: s_nop
|
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
|
; GISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], s32 ; 4-byte Folded Reload
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX11-LABEL: chain_preserve_to_chain:
|
|
; DAGISEL-GFX11: ; %bb.0:
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; 4-byte Folded Spill
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX11-NEXT: s_nop
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX11-NEXT: scratch_load_b32 v16, off, s32 ; 4-byte Folded Reload
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX10-LABEL: chain_preserve_to_chain:
|
|
; DAGISEL-GFX10: ; %bb.0:
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX10-NEXT: s_nop
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], s32 ; 4-byte Folded Reload
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
|
|
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
|
|
unreachable
|
|
}
|
|
|
|
define amdgpu_cs_chain_preserve void @chain_preserve_to_chain_wwm(<3 x i32> inreg %a, <3 x i32> %b) {
|
|
; GISEL-GFX11-LABEL: chain_preserve_to_chain_wwm:
|
|
; GISEL-GFX11: ; %bb.0:
|
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; 4-byte Folded Spill
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX11-NEXT: s_or_saveexec_b32 s0, -1
|
|
; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GISEL-GFX11-NEXT: v_cndmask_b32_e64 v1, 4, 3, s0
|
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, s0
|
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX11-NEXT: s_nop
|
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
|
; GISEL-GFX11-NEXT: scratch_load_b32 v16, off, s32 ; 4-byte Folded Reload
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v2, v1
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v2
|
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; GISEL-GFX10-LABEL: chain_preserve_to_chain_wwm:
|
|
; GISEL-GFX10: ; %bb.0:
|
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX10-NEXT: s_or_saveexec_b32 s0, -1
|
|
; GISEL-GFX10-NEXT: v_cndmask_b32_e64 v1, 4, 3, s0
|
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, s0
|
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX10-NEXT: s_nop
|
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
|
; GISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], s32 ; 4-byte Folded Reload
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, v1
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v2
|
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_wwm:
|
|
; DAGISEL-GFX11: ; %bb.0:
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; 4-byte Folded Spill
|
|
; DAGISEL-GFX11-NEXT: s_or_saveexec_b32 s4, -1
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX11-NEXT: v_cndmask_b32_e64 v1, 4, 3, s4
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, s4
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX11-NEXT: s_nop
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX11-NEXT: scratch_load_b32 v16, off, s32 ; 4-byte Folded Reload
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v2, v1
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v2
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_wwm:
|
|
; DAGISEL-GFX10: ; %bb.0:
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill
|
|
; DAGISEL-GFX10-NEXT: s_or_saveexec_b32 s4, -1
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX10-NEXT: v_cndmask_b32_e64 v1, 4, 3, s4
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, s4
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX10-NEXT: s_nop
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], s32 ; 4-byte Folded Reload
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, v1
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v2
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
%i = call i32 @llvm.amdgcn.set.inactive(i32 3, i32 4)
|
|
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
|
|
%w = call i32 @llvm.amdgcn.wwm(i32 %i)
|
|
%c = insertelement <3 x i32> %b, i32 %w, i32 0
|
|
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %c, i32 0)
|
|
unreachable
|
|
}
|
|
|
|
define amdgpu_cs_chain_preserve void @chain_preserve_to_chain_use_all_v0_v7(<3 x i32> inreg %a, <3 x i32> %b) {
|
|
; GISEL-GFX11-LABEL: chain_preserve_to_chain_use_all_v0_v7:
|
|
; GISEL-GFX11: ; %bb.0:
|
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX11-NEXT: s_clause 0x1 ; 8-byte Folded Spill
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v11, s32 offset:4
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX11-NEXT: s_nop
|
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v11
|
|
; GISEL-GFX11-NEXT: s_clause 0x1 ; 8-byte Folded Reload
|
|
; GISEL-GFX11-NEXT: scratch_load_b32 v16, off, s32
|
|
; GISEL-GFX11-NEXT: scratch_load_b32 v11, off, s32 offset:4
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; GISEL-GFX10-LABEL: chain_preserve_to_chain_use_all_v0_v7:
|
|
; GISEL-GFX10: ; %bb.0:
|
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v11, off, s[48:51], s32 offset:4 ; 4-byte Folded Spill
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX10-NEXT: s_nop
|
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v11
|
|
; GISEL-GFX10-NEXT: s_clause 0x1 ; 8-byte Folded Reload
|
|
; GISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], s32
|
|
; GISEL-GFX10-NEXT: buffer_load_dword v11, off, s[48:51], s32 offset:4
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_use_all_v0_v7:
|
|
; DAGISEL-GFX11: ; %bb.0:
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX11-NEXT: s_clause 0x1 ; 8-byte Folded Spill
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v11, s32 offset:4
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX11-NEXT: s_nop
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v11
|
|
; DAGISEL-GFX11-NEXT: s_clause 0x1 ; 8-byte Folded Reload
|
|
; DAGISEL-GFX11-NEXT: scratch_load_b32 v16, off, s32
|
|
; DAGISEL-GFX11-NEXT: scratch_load_b32 v11, off, s32 offset:4
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_use_all_v0_v7:
|
|
; DAGISEL-GFX10: ; %bb.0:
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v11, off, s[48:51], s32 offset:4 ; 4-byte Folded Spill
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX10-NEXT: s_nop
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v11
|
|
; DAGISEL-GFX10-NEXT: s_clause 0x1 ; 8-byte Folded Reload
|
|
; DAGISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], s32
|
|
; DAGISEL-GFX10-NEXT: buffer_load_dword v11, off, s[48:51], s32 offset:4
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
call void asm "s_nop", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v16},~{s0}"()
|
|
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
|
|
unreachable
|
|
}
|
|
|
|
define amdgpu_cs_chain_preserve void @chain_preserve_to_chain_preserve_fewer_args(<3 x i32> inreg %a, <3 x i32> %b) {
|
|
; GISEL-GFX11-LABEL: chain_preserve_to_chain_preserve_fewer_args:
|
|
; GISEL-GFX11: ; %bb.0:
|
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; 4-byte Folded Spill
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s2, s0
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX11-NEXT: s_nop
|
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
|
; GISEL-GFX11-NEXT: scratch_load_b32 v16, off, s32 ; 4-byte Folded Reload
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee_2@abs32@lo
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee_2@abs32@hi
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s2
|
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; GISEL-GFX10-LABEL: chain_preserve_to_chain_preserve_fewer_args:
|
|
; GISEL-GFX10: ; %bb.0:
|
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s2, s0
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX10-NEXT: s_nop
|
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
|
; GISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], s32 ; 4-byte Folded Reload
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee_2@abs32@lo
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee_2@abs32@hi
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s2
|
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_preserve_fewer_args:
|
|
; DAGISEL-GFX11: ; %bb.0:
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; 4-byte Folded Spill
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s2, s0
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX11-NEXT: s_nop
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX11-NEXT: scratch_load_b32 v16, off, s32 ; 4-byte Folded Reload
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee_2@abs32@hi
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee_2@abs32@lo
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s2
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_preserve_fewer_args:
|
|
; DAGISEL-GFX10: ; %bb.0:
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s2, s0
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX10-NEXT: s_nop
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], s32 ; 4-byte Folded Reload
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee_2@abs32@hi
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee_2@abs32@lo
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s2
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
%s = shufflevector <3 x i32> %a, <3 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
|
|
%v = shufflevector <3 x i32> %b, <3 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
|
|
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
|
|
call void(ptr, i32, <2 x i32>, <2 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v2i32(ptr @chain_preserve_callee_2, i32 -1, <2 x i32> inreg %s, <2 x i32> %v, i32 0)
|
|
unreachable
|
|
}
|
|
|
|
; Note that amdgpu_cs_chain_preserve functions are not allowed to call
|
|
; llvm.amdgcn.cs.chain with more vgpr args than they received as parameters.
|
|
|
|
define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_realign_stack(i32 %idx, <3 x i32> inreg %a, <3 x i32> %b) {
|
|
; GISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_realign_stack:
|
|
; GISEL-GFX11: ; %bb.0:
|
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s7, 4
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s6, 3
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, 2
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, 1
|
|
; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_lshlrev_b32 v0, 4, v8
|
|
; GISEL-GFX11-NEXT: s_add_i32 s33, s32, 31
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v3, s6
|
|
; GISEL-GFX11-NEXT: s_and_not1_b32 s33, s33, 31
|
|
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v4, s7 :: v_dual_add_nc_u32 v5, s33, v0
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s33 ; 4-byte Folded Spill
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX11-NEXT: s_nop
|
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v9 :: v_dual_add_nc_u32 v5, 32, v5
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v9, v10 :: v_dual_mov_b32 v10, v11
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s34, s32
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo
|
|
; GISEL-GFX11-NEXT: scratch_store_b128 v5, v[1:4], off dlc
|
|
; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GISEL-GFX11-NEXT: scratch_load_b32 v16, off, s33 ; 4-byte Folded Reload
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX11-NEXT: s_addk_i32 s32, 0xe0
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s32, s34
|
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; GISEL-GFX10-LABEL: amdgpu_cs_chain_preserve_realign_stack:
|
|
; GISEL-GFX10: ; %bb.0:
|
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: s_add_i32 s33, s32, 0x3e0
|
|
; GISEL-GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v8
|
|
; GISEL-GFX10-NEXT: s_and_b32 s33, s33, 0xfffffc00
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 1
|
|
; GISEL-GFX10-NEXT: v_lshrrev_b32_e64 v3, 5, s33
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, 3
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v5, 4
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s33 ; 4-byte Folded Spill
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX10-NEXT: v_add_nc_u32_e32 v2, v0, v3
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, 2
|
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX10-NEXT: s_nop
|
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v9
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v10
|
|
; GISEL-GFX10-NEXT: v_add_nc_u32_e32 v2, 32, v2
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v11
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s34, s32
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v1, v2, s[48:51], 0 offen
|
|
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v3, v2, s[48:51], 0 offen offset:4
|
|
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v4, v2, s[48:51], 0 offen offset:8
|
|
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v5, v2, s[48:51], 0 offen offset:12
|
|
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], s33 ; 4-byte Folded Reload
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX10-NEXT: s_addk_i32 s32, 0x1c00
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s32, s34
|
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_realign_stack:
|
|
; DAGISEL-GFX11: ; %bb.0:
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX11-NEXT: s_add_i32 s33, s32, 31
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s34, s32
|
|
; DAGISEL-GFX11-NEXT: s_and_not1_b32 s33, s33, 31
|
|
; DAGISEL-GFX11-NEXT: s_addk_i32 s32, 0xe0
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX11-NEXT: s_add_i32 s0, s33, 32
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
|
|
; DAGISEL-GFX11-NEXT: v_lshl_add_u32 v5, v8, 4, s0
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v3, 3 :: v_dual_mov_b32 v4, 4
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s33 ; 4-byte Folded Spill
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX11-NEXT: s_nop
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v9 :: v_dual_mov_b32 v9, v10
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b128 v5, v[1:4], off dlc
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; DAGISEL-GFX11-NEXT: scratch_load_b32 v16, off, s33 ; 4-byte Folded Reload
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v10, v11
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s32, s34
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_preserve_realign_stack:
|
|
; DAGISEL-GFX10: ; %bb.0:
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: s_add_i32 s33, s32, 0x3e0
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, 4
|
|
; DAGISEL-GFX10-NEXT: s_and_b32 s33, s33, 0xfffffc00
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, 3
|
|
; DAGISEL-GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v4, 2
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v5, 1
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s33 ; 4-byte Folded Spill
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX10-NEXT: v_add_nc_u32_e32 v0, 32, v0
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s34, s32
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo
|
|
; DAGISEL-GFX10-NEXT: s_addk_i32 s32, 0x1c00
|
|
; DAGISEL-GFX10-NEXT: v_lshl_add_u32 v1, v8, 4, v0
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX10-NEXT: s_nop
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v9
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v10
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v11
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v2, v1, s[48:51], 0 offen offset:12
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v3, v1, s[48:51], 0 offen offset:8
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v4, v1, s[48:51], 0 offen offset:4
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v5, v1, s[48:51], 0 offen
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; DAGISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], s33 ; 4-byte Folded Reload
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s32, s34
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
|
|
%alloca.align32 = alloca [8 x <4 x i32>], align 32, addrspace(5)
|
|
%gep0 = getelementptr inbounds [8 x <4 x i32>], ptr addrspace(5) %alloca.align32, i32 0, i32 %idx
|
|
store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr addrspace(5) %gep0, align 32
|
|
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
|
|
unreachable
|
|
}
|
|
|
|
declare void @llvm.amdgcn.cs.chain.v3i32(ptr, i32, <3 x i32>, <3 x i32>, i32, ...)
|
|
declare amdgpu_cs_chain_preserve void @chain_preserve_callee(<3 x i32> inreg, <3 x i32>)
|
|
declare amdgpu_cs_chain void @chain_callee(<3 x i32> inreg, <3 x i32>)
|
|
|
|
declare void @llvm.amdgcn.cs.chain.v2i32(ptr, i32, <2 x i32>, <2 x i32>, i32, ...)
|
|
declare amdgpu_cs_chain_preserve void @chain_preserve_callee_2(<2 x i32> inreg, <2 x i32>)
|
|
|
|
declare i32 @llvm.amdgcn.set.inactive(i32, i32)
|
|
declare i32 @llvm.amdgcn.wwm(i32)
|