
Recent upstream trends have moved away from explicitly using `-verify-machineinstrs`, as it's already covered by the expensive checks. This PR removes almost all `-verify-machineinstrs` from tests in `llvm/test/CodeGen/AMDGPU/*.ll`, leaving only those tests where its removal currently causes failures.
917 lines
37 KiB
LLVM
917 lines
37 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck --check-prefixes=GCN,GFX90A %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GCN,GFX908 %s
|
|
|
|
define void @func_empty() #0 {
|
|
; GCN-LABEL: func_empty:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
ret void
|
|
}
|
|
|
|
define void @func_areg_4() #0 {
|
|
; GCN-LABEL: func_areg_4:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: ;;#ASMSTART
|
|
; GCN-NEXT: ; use agpr3
|
|
; GCN-NEXT: ;;#ASMEND
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
call void asm sideeffect "; use agpr3", "~{a3}" ()
|
|
ret void
|
|
}
|
|
|
|
define void @func_areg_32() #0 {
|
|
; GCN-LABEL: func_areg_32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: ;;#ASMSTART
|
|
; GCN-NEXT: ; use agpr31
|
|
; GCN-NEXT: ;;#ASMEND
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
call void asm sideeffect "; use agpr31", "~{a31}" ()
|
|
ret void
|
|
}
|
|
|
|
define void @func_areg_33() #0 {
|
|
; GFX90A-LABEL: func_areg_33:
|
|
; GFX90A: ; %bb.0:
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX90A-NEXT: v_accvgpr_read_b32 v0, a32 ; Reload Reuse
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; use agpr32
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: v_accvgpr_write_b32 a32, v0 ; Reload Reuse
|
|
; GFX90A-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX908-LABEL: func_areg_33:
|
|
; GFX908: ; %bb.0:
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use agpr32
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
|
call void asm sideeffect "; use agpr32", "~{a32}" ()
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @func_areg_64() #0 {
|
|
; GFX90A-LABEL: func_areg_64:
|
|
; GFX90A: ; %bb.0:
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX90A-NEXT: v_accvgpr_read_b32 v0, a63 ; Reload Reuse
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; use agpr63
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: v_accvgpr_write_b32 a63, v0 ; Reload Reuse
|
|
; GFX90A-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX908-LABEL: func_areg_64:
|
|
; GFX908: ; %bb.0:
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use agpr63
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
|
call void asm sideeffect "; use agpr63", "~{a63}" ()
|
|
ret void
|
|
}
|
|
|
|
define void @func_areg_31_63() #0 {
|
|
; GFX90A-LABEL: func_areg_31_63:
|
|
; GFX90A: ; %bb.0:
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX90A-NEXT: v_accvgpr_read_b32 v0, a63 ; Reload Reuse
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; use agpr31, agpr63
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: v_accvgpr_write_b32 a63, v0 ; Reload Reuse
|
|
; GFX90A-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX908-LABEL: func_areg_31_63:
|
|
; GFX908: ; %bb.0:
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use agpr31, agpr63
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
|
call void asm sideeffect "; use agpr31, agpr63", "~{a31},~{a63}" ()
|
|
ret void
|
|
}
|
|
|
|
declare void @func_unknown() #0
|
|
|
|
define amdgpu_kernel void @test_call_empty() #0 {
|
|
; GFX90A-LABEL: test_call_empty:
|
|
; GFX90A: ; %bb.0: ; %bb
|
|
; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
|
|
; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
|
|
; GFX90A-NEXT: s_mov_b32 s22, -1
|
|
; GFX90A-NEXT: s_mov_b32 s23, 0xe00000
|
|
; GFX90A-NEXT: s_add_u32 s20, s20, s11
|
|
; GFX90A-NEXT: s_addc_u32 s21, s21, 0
|
|
; GFX90A-NEXT: s_mov_b32 s12, s8
|
|
; GFX90A-NEXT: s_add_u32 s8, s4, 36
|
|
; GFX90A-NEXT: s_mov_b32 s13, s9
|
|
; GFX90A-NEXT: s_addc_u32 s9, s5, 0
|
|
; GFX90A-NEXT: s_getpc_b64 s[4:5]
|
|
; GFX90A-NEXT: s_add_u32 s4, s4, func_empty@gotpcrel32@lo+4
|
|
; GFX90A-NEXT: s_addc_u32 s5, s5, func_empty@gotpcrel32@hi+12
|
|
; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
|
; GFX90A-NEXT: s_mov_b32 s14, s10
|
|
; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
|
|
; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
|
|
; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
|
|
; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21]
|
|
; GFX90A-NEXT: v_mov_b32_e32 v31, v0
|
|
; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23]
|
|
; GFX90A-NEXT: s_mov_b32 s32, 0
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a[0:31]
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[28:31], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[24:27], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[20:23], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[16:19], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[12:15], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[8:11], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[4:7], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[0:3], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: s_endpgm
|
|
;
|
|
; GFX908-LABEL: test_call_empty:
|
|
; GFX908: ; %bb.0: ; %bb
|
|
; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
|
|
; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
|
|
; GFX908-NEXT: s_mov_b32 s22, -1
|
|
; GFX908-NEXT: s_mov_b32 s23, 0xe00000
|
|
; GFX908-NEXT: s_add_u32 s20, s20, s11
|
|
; GFX908-NEXT: s_addc_u32 s21, s21, 0
|
|
; GFX908-NEXT: s_mov_b32 s12, s8
|
|
; GFX908-NEXT: s_add_u32 s8, s4, 36
|
|
; GFX908-NEXT: s_mov_b32 s13, s9
|
|
; GFX908-NEXT: s_addc_u32 s9, s5, 0
|
|
; GFX908-NEXT: s_getpc_b64 s[4:5]
|
|
; GFX908-NEXT: s_add_u32 s4, s4, func_empty@gotpcrel32@lo+4
|
|
; GFX908-NEXT: s_addc_u32 s5, s5, func_empty@gotpcrel32@hi+12
|
|
; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
|
; GFX908-NEXT: s_mov_b32 s14, s10
|
|
; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
|
|
; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
|
; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
|
; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
|
|
; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
|
|
; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21]
|
|
; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
|
|
; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23]
|
|
; GFX908-NEXT: s_mov_b32 s32, 0
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def a[0:31]
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v6, a3
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v5, a2
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v3, a0
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v10, a7
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v9, a6
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v8, a5
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v7, a4
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v14, a11
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v13, a10
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v12, a9
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v11, a8
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v18, a15
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v17, a14
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v16, a13
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v15, a12
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v22, a19
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v21, a18
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v20, a17
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v19, a16
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v26, a23
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v25, a22
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v24, a21
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v23, a20
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v30, a27
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v29, a26
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v28, a25
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v27, a24
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v35, a31
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v34, a30
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v33, a29
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v32, a28
|
|
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: s_endpgm
|
|
bb:
|
|
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
|
|
call void @func_empty()
|
|
store volatile <32 x float> %reg, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @test_call_areg4() #0 {
|
|
; GFX90A-LABEL: test_call_areg4:
|
|
; GFX90A: ; %bb.0: ; %bb
|
|
; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
|
|
; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
|
|
; GFX90A-NEXT: s_mov_b32 s22, -1
|
|
; GFX90A-NEXT: s_mov_b32 s23, 0xe00000
|
|
; GFX90A-NEXT: s_add_u32 s20, s20, s11
|
|
; GFX90A-NEXT: s_addc_u32 s21, s21, 0
|
|
; GFX90A-NEXT: s_mov_b32 s12, s8
|
|
; GFX90A-NEXT: s_add_u32 s8, s4, 36
|
|
; GFX90A-NEXT: s_mov_b32 s13, s9
|
|
; GFX90A-NEXT: s_addc_u32 s9, s5, 0
|
|
; GFX90A-NEXT: s_getpc_b64 s[4:5]
|
|
; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_4@gotpcrel32@lo+4
|
|
; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_4@gotpcrel32@hi+12
|
|
; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
|
; GFX90A-NEXT: s_mov_b32 s14, s10
|
|
; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
|
|
; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
|
|
; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
|
|
; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21]
|
|
; GFX90A-NEXT: v_mov_b32_e32 v31, v0
|
|
; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23]
|
|
; GFX90A-NEXT: s_mov_b32 s32, 0
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a[4:35]
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[28:31], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[24:27], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[20:23], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[16:19], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[12:15], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[8:11], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[4:7], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: s_endpgm
|
|
;
|
|
; GFX908-LABEL: test_call_areg4:
|
|
; GFX908: ; %bb.0: ; %bb
|
|
; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
|
|
; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
|
|
; GFX908-NEXT: s_mov_b32 s22, -1
|
|
; GFX908-NEXT: s_mov_b32 s23, 0xe00000
|
|
; GFX908-NEXT: s_add_u32 s20, s20, s11
|
|
; GFX908-NEXT: s_addc_u32 s21, s21, 0
|
|
; GFX908-NEXT: s_mov_b32 s12, s8
|
|
; GFX908-NEXT: s_add_u32 s8, s4, 36
|
|
; GFX908-NEXT: s_mov_b32 s13, s9
|
|
; GFX908-NEXT: s_addc_u32 s9, s5, 0
|
|
; GFX908-NEXT: s_getpc_b64 s[4:5]
|
|
; GFX908-NEXT: s_add_u32 s4, s4, func_areg_4@gotpcrel32@lo+4
|
|
; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_4@gotpcrel32@hi+12
|
|
; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
|
; GFX908-NEXT: s_mov_b32 s14, s10
|
|
; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
|
|
; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
|
; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
|
; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
|
|
; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
|
|
; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21]
|
|
; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
|
|
; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23]
|
|
; GFX908-NEXT: s_mov_b32 s32, 0
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def a[0:31]
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v6, a3
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v5, a2
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v3, a0
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v10, a7
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v9, a6
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v8, a5
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v7, a4
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v14, a11
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v13, a10
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v12, a9
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v11, a8
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v18, a15
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v17, a14
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v16, a13
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v15, a12
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v22, a19
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v21, a18
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v20, a17
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v19, a16
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v26, a23
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v25, a22
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v24, a21
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v23, a20
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v30, a27
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v29, a26
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v28, a25
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v27, a24
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v35, a31
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v34, a30
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v33, a29
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v32, a28
|
|
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: s_endpgm
|
|
bb:
|
|
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
|
|
call void @func_areg_4()
|
|
store volatile <32 x float> %reg, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @test_call_areg32() #0 {
|
|
; GFX90A-LABEL: test_call_areg32:
|
|
; GFX90A: ; %bb.0: ; %bb
|
|
; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
|
|
; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
|
|
; GFX90A-NEXT: s_mov_b32 s22, -1
|
|
; GFX90A-NEXT: s_mov_b32 s23, 0xe00000
|
|
; GFX90A-NEXT: s_add_u32 s20, s20, s11
|
|
; GFX90A-NEXT: s_addc_u32 s21, s21, 0
|
|
; GFX90A-NEXT: s_mov_b32 s12, s8
|
|
; GFX90A-NEXT: s_add_u32 s8, s4, 36
|
|
; GFX90A-NEXT: s_mov_b32 s13, s9
|
|
; GFX90A-NEXT: s_addc_u32 s9, s5, 0
|
|
; GFX90A-NEXT: s_getpc_b64 s[4:5]
|
|
; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_32@gotpcrel32@lo+4
|
|
; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_32@gotpcrel32@hi+12
|
|
; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
|
; GFX90A-NEXT: s_mov_b32 s14, s10
|
|
; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
|
|
; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
|
|
; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
|
|
; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21]
|
|
; GFX90A-NEXT: v_mov_b32_e32 v31, v0
|
|
; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23]
|
|
; GFX90A-NEXT: s_mov_b32 s32, 0
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a[32:63]
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[60:63], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[56:59], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[52:55], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[48:51], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[44:47], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[40:43], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[36:39], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: s_endpgm
|
|
;
|
|
; GFX908-LABEL: test_call_areg32:
|
|
; GFX908: ; %bb.0: ; %bb
|
|
; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
|
|
; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
|
|
; GFX908-NEXT: s_mov_b32 s22, -1
|
|
; GFX908-NEXT: s_mov_b32 s23, 0xe00000
|
|
; GFX908-NEXT: s_add_u32 s20, s20, s11
|
|
; GFX908-NEXT: s_addc_u32 s21, s21, 0
|
|
; GFX908-NEXT: s_mov_b32 s12, s8
|
|
; GFX908-NEXT: s_add_u32 s8, s4, 36
|
|
; GFX908-NEXT: s_mov_b32 s13, s9
|
|
; GFX908-NEXT: s_addc_u32 s9, s5, 0
|
|
; GFX908-NEXT: s_getpc_b64 s[4:5]
|
|
; GFX908-NEXT: s_add_u32 s4, s4, func_areg_32@gotpcrel32@lo+4
|
|
; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_32@gotpcrel32@hi+12
|
|
; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
|
; GFX908-NEXT: s_mov_b32 s14, s10
|
|
; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
|
|
; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
|
; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
|
; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
|
|
; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
|
|
; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21]
|
|
; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
|
|
; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23]
|
|
; GFX908-NEXT: s_mov_b32 s32, 0
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def a[0:31]
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v6, a3
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v5, a2
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v3, a0
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v10, a7
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v9, a6
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v8, a5
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v7, a4
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v14, a11
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v13, a10
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v12, a9
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v11, a8
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v18, a15
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v17, a14
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v16, a13
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v15, a12
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v22, a19
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v21, a18
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v20, a17
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v19, a16
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v26, a23
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v25, a22
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v24, a21
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v23, a20
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v30, a27
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v29, a26
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v28, a25
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v27, a24
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v35, a31
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v34, a30
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v33, a29
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v32, a28
|
|
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: s_endpgm
|
|
bb:
|
|
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
|
|
call void @func_areg_32()
|
|
store volatile <32 x float> %reg, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @test_call_areg64() #0 {
|
|
; GFX90A-LABEL: test_call_areg64:
|
|
; GFX90A: ; %bb.0: ; %bb
|
|
; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
|
|
; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
|
|
; GFX90A-NEXT: s_mov_b32 s22, -1
|
|
; GFX90A-NEXT: s_mov_b32 s23, 0xe00000
|
|
; GFX90A-NEXT: s_add_u32 s20, s20, s11
|
|
; GFX90A-NEXT: s_addc_u32 s21, s21, 0
|
|
; GFX90A-NEXT: s_mov_b32 s12, s8
|
|
; GFX90A-NEXT: s_add_u32 s8, s4, 36
|
|
; GFX90A-NEXT: s_mov_b32 s13, s9
|
|
; GFX90A-NEXT: s_addc_u32 s9, s5, 0
|
|
; GFX90A-NEXT: s_getpc_b64 s[4:5]
|
|
; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_64@gotpcrel32@lo+4
|
|
; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_64@gotpcrel32@hi+12
|
|
; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
|
; GFX90A-NEXT: s_mov_b32 s14, s10
|
|
; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
|
|
; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
|
|
; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
|
|
; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21]
|
|
; GFX90A-NEXT: v_mov_b32_e32 v31, v0
|
|
; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23]
|
|
; GFX90A-NEXT: s_mov_b32 s32, 0
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a[0:31]
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[28:31], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[24:27], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[20:23], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[16:19], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[12:15], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[8:11], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[4:7], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[0:3], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: s_endpgm
|
|
;
|
|
; GFX908-LABEL: test_call_areg64:
|
|
; GFX908: ; %bb.0: ; %bb
|
|
; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
|
|
; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
|
|
; GFX908-NEXT: s_mov_b32 s22, -1
|
|
; GFX908-NEXT: s_mov_b32 s23, 0xe00000
|
|
; GFX908-NEXT: s_add_u32 s20, s20, s11
|
|
; GFX908-NEXT: s_addc_u32 s21, s21, 0
|
|
; GFX908-NEXT: s_mov_b32 s12, s8
|
|
; GFX908-NEXT: s_add_u32 s8, s4, 36
|
|
; GFX908-NEXT: s_mov_b32 s13, s9
|
|
; GFX908-NEXT: s_addc_u32 s9, s5, 0
|
|
; GFX908-NEXT: s_getpc_b64 s[4:5]
|
|
; GFX908-NEXT: s_add_u32 s4, s4, func_areg_64@gotpcrel32@lo+4
|
|
; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_64@gotpcrel32@hi+12
|
|
; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
|
; GFX908-NEXT: s_mov_b32 s14, s10
|
|
; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
|
|
; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
|
; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
|
; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
|
|
; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
|
|
; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21]
|
|
; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
|
|
; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23]
|
|
; GFX908-NEXT: s_mov_b32 s32, 0
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def a[0:31]
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v6, a3
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v5, a2
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v3, a0
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v10, a7
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v9, a6
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v8, a5
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v7, a4
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v14, a11
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v13, a10
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v12, a9
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v11, a8
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v18, a15
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v17, a14
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v16, a13
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v15, a12
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v22, a19
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v21, a18
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v20, a17
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v19, a16
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v26, a23
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v25, a22
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v24, a21
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v23, a20
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v30, a27
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v29, a26
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v28, a25
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v27, a24
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v35, a31
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v34, a30
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v33, a29
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v32, a28
|
|
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: s_endpgm
|
|
bb:
|
|
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
|
|
call void @func_areg_64()
|
|
store volatile <32 x float> %reg, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @test_call_areg31_63() #0 {
|
|
; GFX90A-LABEL: test_call_areg31_63:
|
|
; GFX90A: ; %bb.0: ; %bb
|
|
; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
|
|
; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
|
|
; GFX90A-NEXT: s_mov_b32 s22, -1
|
|
; GFX90A-NEXT: s_mov_b32 s23, 0xe00000
|
|
; GFX90A-NEXT: s_add_u32 s20, s20, s11
|
|
; GFX90A-NEXT: s_addc_u32 s21, s21, 0
|
|
; GFX90A-NEXT: s_mov_b32 s12, s8
|
|
; GFX90A-NEXT: s_add_u32 s8, s4, 36
|
|
; GFX90A-NEXT: s_mov_b32 s13, s9
|
|
; GFX90A-NEXT: s_addc_u32 s9, s5, 0
|
|
; GFX90A-NEXT: s_getpc_b64 s[4:5]
|
|
; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_31_63@gotpcrel32@lo+4
|
|
; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_31_63@gotpcrel32@hi+12
|
|
; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
|
; GFX90A-NEXT: s_mov_b32 s14, s10
|
|
; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
|
|
; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
|
|
; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
|
|
; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21]
|
|
; GFX90A-NEXT: v_mov_b32_e32 v31, v0
|
|
; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23]
|
|
; GFX90A-NEXT: s_mov_b32 s32, 0
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a[32:63]
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[60:63], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[56:59], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[52:55], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[48:51], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[44:47], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[40:43], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[36:39], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: s_endpgm
|
|
;
|
|
; GFX908-LABEL: test_call_areg31_63:
|
|
; GFX908: ; %bb.0: ; %bb
|
|
; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
|
|
; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
|
|
; GFX908-NEXT: s_mov_b32 s22, -1
|
|
; GFX908-NEXT: s_mov_b32 s23, 0xe00000
|
|
; GFX908-NEXT: s_add_u32 s20, s20, s11
|
|
; GFX908-NEXT: s_addc_u32 s21, s21, 0
|
|
; GFX908-NEXT: s_mov_b32 s12, s8
|
|
; GFX908-NEXT: s_add_u32 s8, s4, 36
|
|
; GFX908-NEXT: s_mov_b32 s13, s9
|
|
; GFX908-NEXT: s_addc_u32 s9, s5, 0
|
|
; GFX908-NEXT: s_getpc_b64 s[4:5]
|
|
; GFX908-NEXT: s_add_u32 s4, s4, func_areg_31_63@gotpcrel32@lo+4
|
|
; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_31_63@gotpcrel32@hi+12
|
|
; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
|
; GFX908-NEXT: s_mov_b32 s14, s10
|
|
; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
|
|
; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
|
; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
|
; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
|
|
; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
|
|
; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21]
|
|
; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
|
|
; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23]
|
|
; GFX908-NEXT: s_mov_b32 s32, 0
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def a[0:31]
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v6, a3
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v5, a2
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v3, a0
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v10, a7
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v9, a6
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v8, a5
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v7, a4
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v14, a11
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v13, a10
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v12, a9
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v11, a8
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v18, a15
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v17, a14
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v16, a13
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v15, a12
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v22, a19
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v21, a18
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v20, a17
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v19, a16
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v26, a23
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v25, a22
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v24, a21
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v23, a20
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v30, a27
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v29, a26
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v28, a25
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v27, a24
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v35, a31
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v34, a30
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v33, a29
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v32, a28
|
|
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: s_endpgm
|
|
bb:
|
|
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
|
|
call void @func_areg_31_63()
|
|
store volatile <32 x float> %reg, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @test_call_unknown() #0 {
|
|
; GFX90A-LABEL: test_call_unknown:
|
|
; GFX90A: ; %bb.0: ; %bb
|
|
; GFX90A-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
|
|
; GFX90A-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
|
|
; GFX90A-NEXT: s_mov_b32 s38, -1
|
|
; GFX90A-NEXT: s_mov_b32 s39, 0xe00000
|
|
; GFX90A-NEXT: s_add_u32 s36, s36, s11
|
|
; GFX90A-NEXT: s_addc_u32 s37, s37, 0
|
|
; GFX90A-NEXT: s_mov_b32 s12, s8
|
|
; GFX90A-NEXT: s_add_u32 s8, s4, 36
|
|
; GFX90A-NEXT: s_mov_b32 s13, s9
|
|
; GFX90A-NEXT: s_addc_u32 s9, s5, 0
|
|
; GFX90A-NEXT: s_getpc_b64 s[4:5]
|
|
; GFX90A-NEXT: s_add_u32 s4, s4, func_unknown@gotpcrel32@lo+4
|
|
; GFX90A-NEXT: s_addc_u32 s5, s5, func_unknown@gotpcrel32@hi+12
|
|
; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
|
; GFX90A-NEXT: s_mov_b32 s14, s10
|
|
; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
|
|
; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
|
|
; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
|
|
; GFX90A-NEXT: s_mov_b64 s[0:1], s[36:37]
|
|
; GFX90A-NEXT: v_mov_b32_e32 v31, v0
|
|
; GFX90A-NEXT: s_mov_b64 s[2:3], s[38:39]
|
|
; GFX90A-NEXT: s_mov_b32 s32, 0
|
|
; GFX90A-NEXT: ;;#ASMSTART
|
|
; GFX90A-NEXT: ; def a[32:63]
|
|
; GFX90A-NEXT: ;;#ASMEND
|
|
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[60:63], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[56:59], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[52:55], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[48:51], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[44:47], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[40:43], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[36:39], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX90A-NEXT: s_endpgm
|
|
;
|
|
; GFX908-LABEL: test_call_unknown:
|
|
; GFX908: ; %bb.0: ; %bb
|
|
; GFX908-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
|
|
; GFX908-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
|
|
; GFX908-NEXT: s_mov_b32 s38, -1
|
|
; GFX908-NEXT: s_mov_b32 s39, 0xe00000
|
|
; GFX908-NEXT: s_add_u32 s36, s36, s11
|
|
; GFX908-NEXT: s_addc_u32 s37, s37, 0
|
|
; GFX908-NEXT: s_mov_b32 s12, s8
|
|
; GFX908-NEXT: s_add_u32 s8, s4, 36
|
|
; GFX908-NEXT: s_mov_b32 s13, s9
|
|
; GFX908-NEXT: s_addc_u32 s9, s5, 0
|
|
; GFX908-NEXT: s_getpc_b64 s[4:5]
|
|
; GFX908-NEXT: s_add_u32 s4, s4, func_unknown@gotpcrel32@lo+4
|
|
; GFX908-NEXT: s_addc_u32 s5, s5, func_unknown@gotpcrel32@hi+12
|
|
; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
|
|
; GFX908-NEXT: s_mov_b32 s14, s10
|
|
; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
|
|
; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
|
; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
|
; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
|
|
; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
|
|
; GFX908-NEXT: s_mov_b64 s[0:1], s[36:37]
|
|
; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
|
|
; GFX908-NEXT: s_mov_b64 s[2:3], s[38:39]
|
|
; GFX908-NEXT: s_mov_b32 s32, 0
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def a[0:31]
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v43, a3
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v42, a2
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v41, a1
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v40, a0
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v47, a7
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v46, a6
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v45, a5
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v44, a4
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v59, a11
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v58, a10
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v57, a9
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v56, a8
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v63, a15
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v62, a14
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v61, a13
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v60, a12
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v75, a19
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v74, a18
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v73, a17
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v72, a16
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v79, a23
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v78, a22
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v77, a21
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v76, a20
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v91, a27
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v90, a26
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v89, a25
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v88, a24
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v95, a31
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v94, a30
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v93, a29
|
|
; GFX908-NEXT: v_accvgpr_read_b32 v92, a28
|
|
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[92:95], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[88:91], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[76:79], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[72:75], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[60:63], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[56:59], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[44:47], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[40:43], off
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: s_endpgm
|
|
bb:
|
|
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
|
|
call void @func_unknown()
|
|
store volatile <32 x float> %reg, ptr addrspace(1) poison
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" }
|