llvm-project/llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll
Diana Picus a201f8872a
[AMDGPU] Replace dynamic VGPR feature with attribute (#133444)
Use a function attribute (amdgpu-dynamic-vgpr) instead of a subtarget
feature, as requested in #130030.
2025-06-24 11:09:36 +02:00

372 lines
15 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=CHECK,CHECK-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=CHECK,CHECK-FAKE16 %s
; Make sure we use a stack pointer and allocate 112 * 4 bytes at the beginning of the stack.
define amdgpu_cs void @amdgpu_cs() #0 {
; CHECK-LABEL: amdgpu_cs:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; CHECK-NEXT: s_cmp_lg_u32 0, s33
; CHECK-NEXT: s_cmovk_i32 s33, 0x1c0
; CHECK-NEXT: s_alloc_vgpr 0
; CHECK-NEXT: s_endpgm
ret void
}
define amdgpu_kernel void @kernel() #0 {
; CHECK-LABEL: kernel:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; CHECK-NEXT: s_cmp_lg_u32 0, s33
; CHECK-NEXT: s_cmovk_i32 s33, 0x1c0
; CHECK-NEXT: s_alloc_vgpr 0
; CHECK-NEXT: s_endpgm
ret void
}
define amdgpu_cs void @with_local() #0 {
; CHECK-TRUE16-LABEL: with_local:
; CHECK-TRUE16: ; %bb.0:
; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13
; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33
; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0
; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0
; CHECK-TRUE16-NEXT: s_alloc_vgpr 0
; CHECK-TRUE16-NEXT: s_endpgm
;
; CHECK-FAKE16-LABEL: with_local:
; CHECK-FAKE16: ; %bb.0:
; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 13
; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33
; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0
; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0
; CHECK-FAKE16-NEXT: s_alloc_vgpr 0
; CHECK-FAKE16-NEXT: s_endpgm
%local = alloca i32, addrspace(5)
store volatile i8 13, ptr addrspace(5) %local
ret void
}
; Check that we generate s_cselect for SP if we can fit
; the offset in an inline constant.
define amdgpu_cs void @with_calls_inline_const() #0 {
; CHECK-TRUE16-LABEL: with_calls_inline_const:
; CHECK-TRUE16: ; %bb.0:
; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15
; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33
; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi
; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0
; CHECK-TRUE16-NEXT: s_mov_b32 s0, callee@abs32@lo
; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0
; CHECK-TRUE16-NEXT: v_mov_b32_e32 v0, 0x47
; CHECK-TRUE16-NEXT: s_cselect_b32 s32, 0x1d0, 16
; CHECK-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
; CHECK-TRUE16-NEXT: s_alloc_vgpr 0
; CHECK-TRUE16-NEXT: s_endpgm
;
; CHECK-FAKE16-LABEL: with_calls_inline_const:
; CHECK-FAKE16: ; %bb.0:
; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15
; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33
; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi
; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0
; CHECK-FAKE16-NEXT: s_mov_b32 s0, callee@abs32@lo
; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 0x47
; CHECK-FAKE16-NEXT: s_cselect_b32 s32, 0x1d0, 16
; CHECK-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
; CHECK-FAKE16-NEXT: s_alloc_vgpr 0
; CHECK-FAKE16-NEXT: s_endpgm
%local = alloca i32, addrspace(5)
store volatile i8 15, ptr addrspace(5) %local
call amdgpu_gfx void @callee(i32 71)
ret void
}
; Check that we generate s_mov + s_cmovk if we can't
; fit the offset for SP in an inline constant.
define amdgpu_cs void @with_calls_no_inline_const() #0 {
; CHECK-TRUE16-LABEL: with_calls_no_inline_const:
; CHECK-TRUE16: ; %bb.0:
; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15
; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33
; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi
; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0
; CHECK-TRUE16-NEXT: s_mov_b32 s0, callee@abs32@lo
; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0
; CHECK-TRUE16-NEXT: v_mov_b32_e32 v0, 0x47
; CHECK-TRUE16-NEXT: s_movk_i32 s32, 0x100
; CHECK-TRUE16-NEXT: s_cmovk_i32 s32, 0x2c0
; CHECK-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
; CHECK-TRUE16-NEXT: s_alloc_vgpr 0
; CHECK-TRUE16-NEXT: s_endpgm
;
; CHECK-FAKE16-LABEL: with_calls_no_inline_const:
; CHECK-FAKE16: ; %bb.0:
; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15
; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33
; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi
; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0
; CHECK-FAKE16-NEXT: s_mov_b32 s0, callee@abs32@lo
; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 0x47
; CHECK-FAKE16-NEXT: s_movk_i32 s32, 0x100
; CHECK-FAKE16-NEXT: s_cmovk_i32 s32, 0x2c0
; CHECK-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
; CHECK-FAKE16-NEXT: s_alloc_vgpr 0
; CHECK-FAKE16-NEXT: s_endpgm
%local = alloca i32, i32 61, addrspace(5)
store volatile i8 15, ptr addrspace(5) %local
call amdgpu_gfx void @callee(i32 71)
ret void
}
define amdgpu_cs void @with_spills() #0 {
; CHECK-LABEL: with_spills:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; CHECK-NEXT: s_cmp_lg_u32 0, s33
; CHECK-NEXT: s_cmovk_i32 s33, 0x1c0
; CHECK-NEXT: s_alloc_vgpr 0
; CHECK-NEXT: s_endpgm
call void asm "; spills", "~{v40},~{v42}"()
ret void
}
define amdgpu_cs void @realign_stack(<32 x i32> %x) #0 {
; CHECK-LABEL: realign_stack:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
; CHECK-NEXT: s_mov_b32 s1, callee@abs32@hi
; CHECK-NEXT: s_cmp_lg_u32 0, s33
; CHECK-NEXT: s_mov_b32 s0, callee@abs32@lo
; CHECK-NEXT: s_cmovk_i32 s33, 0x200
; CHECK-NEXT: s_movk_i32 s32, 0x100
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: scratch_store_b128 off, v[28:31], s33 offset:112
; CHECK-NEXT: scratch_store_b128 off, v[24:27], s33 offset:96
; CHECK-NEXT: scratch_store_b128 off, v[20:23], s33 offset:80
; CHECK-NEXT: scratch_store_b128 off, v[16:19], s33 offset:64
; CHECK-NEXT: scratch_store_b128 off, v[12:15], s33 offset:48
; CHECK-NEXT: scratch_store_b128 off, v[8:11], s33 offset:32
; CHECK-NEXT: scratch_store_b128 off, v[4:7], s33 offset:16
; CHECK-NEXT: scratch_store_b128 off, v[0:3], s33
; CHECK-NEXT: v_mov_b32_e32 v0, 0x47
; CHECK-NEXT: s_cmovk_i32 s32, 0x300
; CHECK-NEXT: s_swappc_b64 s[30:31], s[0:1]
; CHECK-NEXT: s_alloc_vgpr 0
; CHECK-NEXT: s_endpgm
%v = alloca <32 x i32>, align 128, addrspace(5)
store <32 x i32> %x, ptr addrspace(5) %v
call amdgpu_gfx void @callee(i32 71)
ret void
}
define amdgpu_cs void @frame_pointer_none() #1 {
; CHECK-TRUE16-LABEL: frame_pointer_none:
; CHECK-TRUE16: ; %bb.0:
; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13
; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33
; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0
; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0
; CHECK-TRUE16-NEXT: s_alloc_vgpr 0
; CHECK-TRUE16-NEXT: s_endpgm
;
; CHECK-FAKE16-LABEL: frame_pointer_none:
; CHECK-FAKE16: ; %bb.0:
; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 13
; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33
; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0
; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0
; CHECK-FAKE16-NEXT: s_alloc_vgpr 0
; CHECK-FAKE16-NEXT: s_endpgm
%local = alloca i32, addrspace(5)
store volatile i8 13, ptr addrspace(5) %local
ret void
}
define amdgpu_cs void @frame_pointer_all() #2 {
; CHECK-TRUE16-LABEL: frame_pointer_all:
; CHECK-TRUE16: ; %bb.0:
; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13
; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33
; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0
; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0
; CHECK-TRUE16-NEXT: s_alloc_vgpr 0
; CHECK-TRUE16-NEXT: s_endpgm
;
; CHECK-FAKE16-LABEL: frame_pointer_all:
; CHECK-FAKE16: ; %bb.0:
; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 13
; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33
; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0
; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0
; CHECK-FAKE16-NEXT: s_alloc_vgpr 0
; CHECK-FAKE16-NEXT: s_endpgm
%local = alloca i32, addrspace(5)
store volatile i8 13, ptr addrspace(5) %local
ret void
}
; Non-entry functions and graphics shaders don't need to worry about CWSR.
define amdgpu_gs void @amdgpu_gs() #0 {
; CHECK-TRUE16-LABEL: amdgpu_gs:
; CHECK-TRUE16: ; %bb.0:
; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15
; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi
; CHECK-TRUE16-NEXT: s_mov_b32 s0, callee@abs32@lo
; CHECK-TRUE16-NEXT: s_mov_b32 s32, 16
; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, off scope:SCOPE_SYS
; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0
; CHECK-TRUE16-NEXT: v_mov_b32_e32 v0, 0x47
; CHECK-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
; CHECK-TRUE16-NEXT: s_alloc_vgpr 0
; CHECK-TRUE16-NEXT: s_endpgm
;
; CHECK-FAKE16-LABEL: amdgpu_gs:
; CHECK-FAKE16: ; %bb.0:
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15
; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi
; CHECK-FAKE16-NEXT: s_mov_b32 s0, callee@abs32@lo
; CHECK-FAKE16-NEXT: s_mov_b32 s32, 16
; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, off scope:SCOPE_SYS
; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 0x47
; CHECK-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
; CHECK-FAKE16-NEXT: s_alloc_vgpr 0
; CHECK-FAKE16-NEXT: s_endpgm
%local = alloca i32, addrspace(5)
store volatile i8 15, ptr addrspace(5) %local
call amdgpu_gfx void @callee(i32 71)
ret void
}
define amdgpu_gfx void @amdgpu_gfx() #0 {
; CHECK-TRUE16-LABEL: amdgpu_gfx:
; CHECK-TRUE16: ; %bb.0:
; CHECK-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-TRUE16-NEXT: s_wait_expcnt 0x0
; CHECK-TRUE16-NEXT: s_wait_samplecnt 0x0
; CHECK-TRUE16-NEXT: s_wait_bvhcnt 0x0
; CHECK-TRUE16-NEXT: s_wait_kmcnt 0x0
; CHECK-TRUE16-NEXT: s_mov_b32 s0, s33
; CHECK-TRUE16-NEXT: s_mov_b32 s33, s32
; CHECK-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
; CHECK-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; 4-byte Folded Spill
; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe
; CHECK-TRUE16-NEXT: s_mov_b32 exec_lo, s1
; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s0, 2
; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15
; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi
; CHECK-TRUE16-NEXT: s_mov_b32 s0, callee@abs32@lo
; CHECK-TRUE16-NEXT: s_add_co_i32 s32, s32, 16
; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s30, 0
; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0
; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0
; CHECK-TRUE16-NEXT: v_mov_b32_e32 v0, 0x47
; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s31, 1
; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe
; CHECK-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
; CHECK-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; CHECK-TRUE16-NEXT: v_readlane_b32 s31, v40, 1
; CHECK-TRUE16-NEXT: v_readlane_b32 s30, v40, 0
; CHECK-TRUE16-NEXT: s_mov_b32 s32, s33
; CHECK-TRUE16-NEXT: v_readlane_b32 s0, v40, 2
; CHECK-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
; CHECK-TRUE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 ; 4-byte Folded Reload
; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe
; CHECK-TRUE16-NEXT: s_mov_b32 exec_lo, s1
; CHECK-TRUE16-NEXT: s_mov_b32 s33, s0
; CHECK-TRUE16-NEXT: s_wait_loadcnt 0x0
; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe
; CHECK-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-FAKE16-LABEL: amdgpu_gfx:
; CHECK-FAKE16: ; %bb.0:
; CHECK-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-FAKE16-NEXT: s_wait_expcnt 0x0
; CHECK-FAKE16-NEXT: s_wait_samplecnt 0x0
; CHECK-FAKE16-NEXT: s_wait_bvhcnt 0x0
; CHECK-FAKE16-NEXT: s_wait_kmcnt 0x0
; CHECK-FAKE16-NEXT: s_mov_b32 s0, s33
; CHECK-FAKE16-NEXT: s_mov_b32 s33, s32
; CHECK-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
; CHECK-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; 4-byte Folded Spill
; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe
; CHECK-FAKE16-NEXT: s_mov_b32 exec_lo, s1
; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s0, 2
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15
; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi
; CHECK-FAKE16-NEXT: s_mov_b32 s0, callee@abs32@lo
; CHECK-FAKE16-NEXT: s_add_co_i32 s32, s32, 16
; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s30, 0
; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0
; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 0x47
; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s31, 1
; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe
; CHECK-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
; CHECK-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; CHECK-FAKE16-NEXT: v_readlane_b32 s31, v40, 1
; CHECK-FAKE16-NEXT: v_readlane_b32 s30, v40, 0
; CHECK-FAKE16-NEXT: s_mov_b32 s32, s33
; CHECK-FAKE16-NEXT: v_readlane_b32 s0, v40, 2
; CHECK-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
; CHECK-FAKE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 ; 4-byte Folded Reload
; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe
; CHECK-FAKE16-NEXT: s_mov_b32 exec_lo, s1
; CHECK-FAKE16-NEXT: s_mov_b32 s33, s0
; CHECK-FAKE16-NEXT: s_wait_loadcnt 0x0
; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe
; CHECK-FAKE16-NEXT: s_setpc_b64 s[30:31]
%local = alloca i32, addrspace(5)
store volatile i8 15, ptr addrspace(5) %local
call amdgpu_gfx void @callee(i32 71)
ret void
}
define void @default() #0 {
; CHECK-LABEL: default:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: s_wait_expcnt 0x0
; CHECK-NEXT: s_wait_samplecnt 0x0
; CHECK-NEXT: s_wait_bvhcnt 0x0
; CHECK-NEXT: s_wait_kmcnt 0x0
; CHECK-NEXT: s_setpc_b64 s[30:31]
ret void
}
declare amdgpu_gfx void @callee(i32) #0
attributes #0 = { nounwind "amdgpu-dynamic-vgpr-block-size"="16" }
attributes #1 = { nounwind "frame-pointer"="none" "amdgpu-dynamic-vgpr-block-size"="16" }
attributes #2 = { nounwind "frame-pointer"="all" "amdgpu-dynamic-vgpr-block-size"="16" }