
Use a function attribute (amdgpu-dynamic-vgpr) instead of a subtarget feature, as requested in #130030.
372 lines
15 KiB
LLVM
372 lines
15 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=CHECK,CHECK-TRUE16 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=CHECK,CHECK-FAKE16 %s
|
|
|
|
; Make sure we use a stack pointer and allocate 112 * 4 bytes at the beginning of the stack.
|
|
|
|
define amdgpu_cs void @amdgpu_cs() #0 {
|
|
; CHECK-LABEL: amdgpu_cs:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
|
|
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; CHECK-NEXT: s_cmp_lg_u32 0, s33
|
|
; CHECK-NEXT: s_cmovk_i32 s33, 0x1c0
|
|
; CHECK-NEXT: s_alloc_vgpr 0
|
|
; CHECK-NEXT: s_endpgm
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel() #0 {
|
|
; CHECK-LABEL: kernel:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
|
|
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; CHECK-NEXT: s_cmp_lg_u32 0, s33
|
|
; CHECK-NEXT: s_cmovk_i32 s33, 0x1c0
|
|
; CHECK-NEXT: s_alloc_vgpr 0
|
|
; CHECK-NEXT: s_endpgm
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_cs void @with_local() #0 {
|
|
; CHECK-TRUE16-LABEL: with_local:
|
|
; CHECK-TRUE16: ; %bb.0:
|
|
; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
|
|
; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13
|
|
; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33
|
|
; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0
|
|
; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
|
|
; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0
|
|
; CHECK-TRUE16-NEXT: s_alloc_vgpr 0
|
|
; CHECK-TRUE16-NEXT: s_endpgm
|
|
;
|
|
; CHECK-FAKE16-LABEL: with_local:
|
|
; CHECK-FAKE16: ; %bb.0:
|
|
; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
|
|
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 13
|
|
; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33
|
|
; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0
|
|
; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
|
|
; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0
|
|
; CHECK-FAKE16-NEXT: s_alloc_vgpr 0
|
|
; CHECK-FAKE16-NEXT: s_endpgm
|
|
%local = alloca i32, addrspace(5)
|
|
store volatile i8 13, ptr addrspace(5) %local
|
|
ret void
|
|
}
|
|
|
|
; Check that we generate s_cselect for SP if we can fit
|
|
; the offset in an inline constant.
|
|
define amdgpu_cs void @with_calls_inline_const() #0 {
|
|
; CHECK-TRUE16-LABEL: with_calls_inline_const:
|
|
; CHECK-TRUE16: ; %bb.0:
|
|
; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
|
|
; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15
|
|
; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33
|
|
; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi
|
|
; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0
|
|
; CHECK-TRUE16-NEXT: s_mov_b32 s0, callee@abs32@lo
|
|
; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
|
|
; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0
|
|
; CHECK-TRUE16-NEXT: v_mov_b32_e32 v0, 0x47
|
|
; CHECK-TRUE16-NEXT: s_cselect_b32 s32, 0x1d0, 16
|
|
; CHECK-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
|
; CHECK-TRUE16-NEXT: s_alloc_vgpr 0
|
|
; CHECK-TRUE16-NEXT: s_endpgm
|
|
;
|
|
; CHECK-FAKE16-LABEL: with_calls_inline_const:
|
|
; CHECK-FAKE16: ; %bb.0:
|
|
; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
|
|
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15
|
|
; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33
|
|
; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi
|
|
; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0
|
|
; CHECK-FAKE16-NEXT: s_mov_b32 s0, callee@abs32@lo
|
|
; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
|
|
; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0
|
|
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 0x47
|
|
; CHECK-FAKE16-NEXT: s_cselect_b32 s32, 0x1d0, 16
|
|
; CHECK-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
|
; CHECK-FAKE16-NEXT: s_alloc_vgpr 0
|
|
; CHECK-FAKE16-NEXT: s_endpgm
|
|
%local = alloca i32, addrspace(5)
|
|
store volatile i8 15, ptr addrspace(5) %local
|
|
call amdgpu_gfx void @callee(i32 71)
|
|
ret void
|
|
}
|
|
|
|
; Check that we generate s_mov + s_cmovk if we can't
|
|
; fit the offset for SP in an inline constant.
|
|
define amdgpu_cs void @with_calls_no_inline_const() #0 {
|
|
; CHECK-TRUE16-LABEL: with_calls_no_inline_const:
|
|
; CHECK-TRUE16: ; %bb.0:
|
|
; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
|
|
; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15
|
|
; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33
|
|
; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi
|
|
; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0
|
|
; CHECK-TRUE16-NEXT: s_mov_b32 s0, callee@abs32@lo
|
|
; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
|
|
; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0
|
|
; CHECK-TRUE16-NEXT: v_mov_b32_e32 v0, 0x47
|
|
; CHECK-TRUE16-NEXT: s_movk_i32 s32, 0x100
|
|
; CHECK-TRUE16-NEXT: s_cmovk_i32 s32, 0x2c0
|
|
; CHECK-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
|
; CHECK-TRUE16-NEXT: s_alloc_vgpr 0
|
|
; CHECK-TRUE16-NEXT: s_endpgm
|
|
;
|
|
; CHECK-FAKE16-LABEL: with_calls_no_inline_const:
|
|
; CHECK-FAKE16: ; %bb.0:
|
|
; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
|
|
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15
|
|
; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33
|
|
; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi
|
|
; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0
|
|
; CHECK-FAKE16-NEXT: s_mov_b32 s0, callee@abs32@lo
|
|
; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
|
|
; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0
|
|
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 0x47
|
|
; CHECK-FAKE16-NEXT: s_movk_i32 s32, 0x100
|
|
; CHECK-FAKE16-NEXT: s_cmovk_i32 s32, 0x2c0
|
|
; CHECK-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
|
; CHECK-FAKE16-NEXT: s_alloc_vgpr 0
|
|
; CHECK-FAKE16-NEXT: s_endpgm
|
|
%local = alloca i32, i32 61, addrspace(5)
|
|
store volatile i8 15, ptr addrspace(5) %local
|
|
call amdgpu_gfx void @callee(i32 71)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_cs void @with_spills() #0 {
|
|
; CHECK-LABEL: with_spills:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
|
|
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; CHECK-NEXT: s_cmp_lg_u32 0, s33
|
|
; CHECK-NEXT: s_cmovk_i32 s33, 0x1c0
|
|
; CHECK-NEXT: s_alloc_vgpr 0
|
|
; CHECK-NEXT: s_endpgm
|
|
call void asm "; spills", "~{v40},~{v42}"()
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_cs void @realign_stack(<32 x i32> %x) #0 {
|
|
; CHECK-LABEL: realign_stack:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
|
|
; CHECK-NEXT: s_mov_b32 s1, callee@abs32@hi
|
|
; CHECK-NEXT: s_cmp_lg_u32 0, s33
|
|
; CHECK-NEXT: s_mov_b32 s0, callee@abs32@lo
|
|
; CHECK-NEXT: s_cmovk_i32 s33, 0x200
|
|
; CHECK-NEXT: s_movk_i32 s32, 0x100
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: scratch_store_b128 off, v[28:31], s33 offset:112
|
|
; CHECK-NEXT: scratch_store_b128 off, v[24:27], s33 offset:96
|
|
; CHECK-NEXT: scratch_store_b128 off, v[20:23], s33 offset:80
|
|
; CHECK-NEXT: scratch_store_b128 off, v[16:19], s33 offset:64
|
|
; CHECK-NEXT: scratch_store_b128 off, v[12:15], s33 offset:48
|
|
; CHECK-NEXT: scratch_store_b128 off, v[8:11], s33 offset:32
|
|
; CHECK-NEXT: scratch_store_b128 off, v[4:7], s33 offset:16
|
|
; CHECK-NEXT: scratch_store_b128 off, v[0:3], s33
|
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0x47
|
|
; CHECK-NEXT: s_cmovk_i32 s32, 0x300
|
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
|
; CHECK-NEXT: s_alloc_vgpr 0
|
|
; CHECK-NEXT: s_endpgm
|
|
%v = alloca <32 x i32>, align 128, addrspace(5)
|
|
store <32 x i32> %x, ptr addrspace(5) %v
|
|
call amdgpu_gfx void @callee(i32 71)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_cs void @frame_pointer_none() #1 {
|
|
; CHECK-TRUE16-LABEL: frame_pointer_none:
|
|
; CHECK-TRUE16: ; %bb.0:
|
|
; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
|
|
; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13
|
|
; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33
|
|
; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0
|
|
; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
|
|
; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0
|
|
; CHECK-TRUE16-NEXT: s_alloc_vgpr 0
|
|
; CHECK-TRUE16-NEXT: s_endpgm
|
|
;
|
|
; CHECK-FAKE16-LABEL: frame_pointer_none:
|
|
; CHECK-FAKE16: ; %bb.0:
|
|
; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
|
|
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 13
|
|
; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33
|
|
; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0
|
|
; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
|
|
; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0
|
|
; CHECK-FAKE16-NEXT: s_alloc_vgpr 0
|
|
; CHECK-FAKE16-NEXT: s_endpgm
|
|
%local = alloca i32, addrspace(5)
|
|
store volatile i8 13, ptr addrspace(5) %local
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_cs void @frame_pointer_all() #2 {
|
|
; CHECK-TRUE16-LABEL: frame_pointer_all:
|
|
; CHECK-TRUE16: ; %bb.0:
|
|
; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
|
|
; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13
|
|
; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33
|
|
; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0
|
|
; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
|
|
; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0
|
|
; CHECK-TRUE16-NEXT: s_alloc_vgpr 0
|
|
; CHECK-TRUE16-NEXT: s_endpgm
|
|
;
|
|
; CHECK-FAKE16-LABEL: frame_pointer_all:
|
|
; CHECK-FAKE16: ; %bb.0:
|
|
; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
|
|
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 13
|
|
; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33
|
|
; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0
|
|
; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
|
|
; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0
|
|
; CHECK-FAKE16-NEXT: s_alloc_vgpr 0
|
|
; CHECK-FAKE16-NEXT: s_endpgm
|
|
%local = alloca i32, addrspace(5)
|
|
store volatile i8 13, ptr addrspace(5) %local
|
|
ret void
|
|
}
|
|
|
|
; Non-entry functions and graphics shaders don't need to worry about CWSR.
|
|
define amdgpu_gs void @amdgpu_gs() #0 {
|
|
; CHECK-TRUE16-LABEL: amdgpu_gs:
|
|
; CHECK-TRUE16: ; %bb.0:
|
|
; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15
|
|
; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi
|
|
; CHECK-TRUE16-NEXT: s_mov_b32 s0, callee@abs32@lo
|
|
; CHECK-TRUE16-NEXT: s_mov_b32 s32, 16
|
|
; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, off scope:SCOPE_SYS
|
|
; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0
|
|
; CHECK-TRUE16-NEXT: v_mov_b32_e32 v0, 0x47
|
|
; CHECK-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
|
; CHECK-TRUE16-NEXT: s_alloc_vgpr 0
|
|
; CHECK-TRUE16-NEXT: s_endpgm
|
|
;
|
|
; CHECK-FAKE16-LABEL: amdgpu_gs:
|
|
; CHECK-FAKE16: ; %bb.0:
|
|
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15
|
|
; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi
|
|
; CHECK-FAKE16-NEXT: s_mov_b32 s0, callee@abs32@lo
|
|
; CHECK-FAKE16-NEXT: s_mov_b32 s32, 16
|
|
; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, off scope:SCOPE_SYS
|
|
; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0
|
|
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 0x47
|
|
; CHECK-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
|
; CHECK-FAKE16-NEXT: s_alloc_vgpr 0
|
|
; CHECK-FAKE16-NEXT: s_endpgm
|
|
%local = alloca i32, addrspace(5)
|
|
store volatile i8 15, ptr addrspace(5) %local
|
|
call amdgpu_gfx void @callee(i32 71)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_gfx void @amdgpu_gfx() #0 {
|
|
; CHECK-TRUE16-LABEL: amdgpu_gfx:
|
|
; CHECK-TRUE16: ; %bb.0:
|
|
; CHECK-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; CHECK-TRUE16-NEXT: s_wait_expcnt 0x0
|
|
; CHECK-TRUE16-NEXT: s_wait_samplecnt 0x0
|
|
; CHECK-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
|
; CHECK-TRUE16-NEXT: s_wait_kmcnt 0x0
|
|
; CHECK-TRUE16-NEXT: s_mov_b32 s0, s33
|
|
; CHECK-TRUE16-NEXT: s_mov_b32 s33, s32
|
|
; CHECK-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
|
|
; CHECK-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; 4-byte Folded Spill
|
|
; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe
|
|
; CHECK-TRUE16-NEXT: s_mov_b32 exec_lo, s1
|
|
; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s0, 2
|
|
; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15
|
|
; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi
|
|
; CHECK-TRUE16-NEXT: s_mov_b32 s0, callee@abs32@lo
|
|
; CHECK-TRUE16-NEXT: s_add_co_i32 s32, s32, 16
|
|
; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s30, 0
|
|
; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0
|
|
; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
|
|
; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0
|
|
; CHECK-TRUE16-NEXT: v_mov_b32_e32 v0, 0x47
|
|
; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s31, 1
|
|
; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe
|
|
; CHECK-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
|
; CHECK-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; CHECK-TRUE16-NEXT: v_readlane_b32 s31, v40, 1
|
|
; CHECK-TRUE16-NEXT: v_readlane_b32 s30, v40, 0
|
|
; CHECK-TRUE16-NEXT: s_mov_b32 s32, s33
|
|
; CHECK-TRUE16-NEXT: v_readlane_b32 s0, v40, 2
|
|
; CHECK-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
|
|
; CHECK-TRUE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 ; 4-byte Folded Reload
|
|
; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe
|
|
; CHECK-TRUE16-NEXT: s_mov_b32 exec_lo, s1
|
|
; CHECK-TRUE16-NEXT: s_mov_b32 s33, s0
|
|
; CHECK-TRUE16-NEXT: s_wait_loadcnt 0x0
|
|
; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe
|
|
; CHECK-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; CHECK-FAKE16-LABEL: amdgpu_gfx:
|
|
; CHECK-FAKE16: ; %bb.0:
|
|
; CHECK-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; CHECK-FAKE16-NEXT: s_wait_expcnt 0x0
|
|
; CHECK-FAKE16-NEXT: s_wait_samplecnt 0x0
|
|
; CHECK-FAKE16-NEXT: s_wait_bvhcnt 0x0
|
|
; CHECK-FAKE16-NEXT: s_wait_kmcnt 0x0
|
|
; CHECK-FAKE16-NEXT: s_mov_b32 s0, s33
|
|
; CHECK-FAKE16-NEXT: s_mov_b32 s33, s32
|
|
; CHECK-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
|
|
; CHECK-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; 4-byte Folded Spill
|
|
; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe
|
|
; CHECK-FAKE16-NEXT: s_mov_b32 exec_lo, s1
|
|
; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s0, 2
|
|
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15
|
|
; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi
|
|
; CHECK-FAKE16-NEXT: s_mov_b32 s0, callee@abs32@lo
|
|
; CHECK-FAKE16-NEXT: s_add_co_i32 s32, s32, 16
|
|
; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s30, 0
|
|
; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0
|
|
; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
|
|
; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0
|
|
; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 0x47
|
|
; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s31, 1
|
|
; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe
|
|
; CHECK-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
|
; CHECK-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; CHECK-FAKE16-NEXT: v_readlane_b32 s31, v40, 1
|
|
; CHECK-FAKE16-NEXT: v_readlane_b32 s30, v40, 0
|
|
; CHECK-FAKE16-NEXT: s_mov_b32 s32, s33
|
|
; CHECK-FAKE16-NEXT: v_readlane_b32 s0, v40, 2
|
|
; CHECK-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
|
|
; CHECK-FAKE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 ; 4-byte Folded Reload
|
|
; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe
|
|
; CHECK-FAKE16-NEXT: s_mov_b32 exec_lo, s1
|
|
; CHECK-FAKE16-NEXT: s_mov_b32 s33, s0
|
|
; CHECK-FAKE16-NEXT: s_wait_loadcnt 0x0
|
|
; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe
|
|
; CHECK-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%local = alloca i32, addrspace(5)
|
|
store volatile i8 15, ptr addrspace(5) %local
|
|
call amdgpu_gfx void @callee(i32 71)
|
|
ret void
|
|
}
|
|
|
|
define void @default() #0 {
|
|
; CHECK-LABEL: default:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; CHECK-NEXT: s_wait_expcnt 0x0
|
|
; CHECK-NEXT: s_wait_samplecnt 0x0
|
|
; CHECK-NEXT: s_wait_bvhcnt 0x0
|
|
; CHECK-NEXT: s_wait_kmcnt 0x0
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
ret void
|
|
}
|
|
|
|
declare amdgpu_gfx void @callee(i32) #0
|
|
|
|
attributes #0 = { nounwind "amdgpu-dynamic-vgpr-block-size"="16" }
|
|
attributes #1 = { nounwind "frame-pointer"="none" "amdgpu-dynamic-vgpr-block-size"="16" }
|
|
attributes #2 = { nounwind "frame-pointer"="all" "amdgpu-dynamic-vgpr-block-size"="16" }
|