Shilei Tian fc0653f31c
[RFC][NFC][AMDGPU] Remove -verify-machineinstrs from llvm/test/CodeGen/AMDGPU/*.ll (#150024)
Recent upstream trends have moved away from explicitly using `-verify-machineinstrs`, as it's already covered by the expensive checks. This PR removes almost all `-verify-machineinstrs` from tests in `llvm/test/CodeGen/AMDGPU/*.ll`, leaving only those tests where its removal currently causes failures.
2025-07-23 13:42:46 -04:00

917 lines
37 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck --check-prefixes=GCN,GFX90A %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GCN,GFX908 %s
define void @func_empty() #0 {
; GCN-LABEL: func_empty:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
ret void
}
define void @func_areg_4() #0 {
; GCN-LABEL: func_areg_4:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use agpr3
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; use agpr3", "~{a3}" ()
ret void
}
define void @func_areg_32() #0 {
; GCN-LABEL: func_areg_32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use agpr31
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; use agpr31", "~{a31}" ()
ret void
}
define void @func_areg_33() #0 {
; GFX90A-LABEL: func_areg_33:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_accvgpr_read_b32 v0, a32 ; Reload Reuse
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use agpr32
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_accvgpr_write_b32 a32, v0 ; Reload Reuse
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: func_areg_33:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use agpr32
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; use agpr32", "~{a32}" ()
ret void
}
define void @func_areg_64() #0 {
; GFX90A-LABEL: func_areg_64:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_accvgpr_read_b32 v0, a63 ; Reload Reuse
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use agpr63
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_accvgpr_write_b32 a63, v0 ; Reload Reuse
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: func_areg_64:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use agpr63
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; use agpr63", "~{a63}" ()
ret void
}
define void @func_areg_31_63() #0 {
; GFX90A-LABEL: func_areg_31_63:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_accvgpr_read_b32 v0, a63 ; Reload Reuse
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use agpr31, agpr63
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_accvgpr_write_b32 a63, v0 ; Reload Reuse
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: func_areg_31_63:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use agpr31, agpr63
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; use agpr31, agpr63", "~{a31},~{a63}" ()
ret void
}
declare void @func_unknown() #0
define amdgpu_kernel void @test_call_empty() #0 {
; GFX90A-LABEL: test_call_empty:
; GFX90A: ; %bb.0: ; %bb
; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
; GFX90A-NEXT: s_mov_b32 s22, -1
; GFX90A-NEXT: s_mov_b32 s23, 0xe00000
; GFX90A-NEXT: s_add_u32 s20, s20, s11
; GFX90A-NEXT: s_addc_u32 s21, s21, 0
; GFX90A-NEXT: s_mov_b32 s12, s8
; GFX90A-NEXT: s_add_u32 s8, s4, 36
; GFX90A-NEXT: s_mov_b32 s13, s9
; GFX90A-NEXT: s_addc_u32 s9, s5, 0
; GFX90A-NEXT: s_getpc_b64 s[4:5]
; GFX90A-NEXT: s_add_u32 s4, s4, func_empty@gotpcrel32@lo+4
; GFX90A-NEXT: s_addc_u32 s5, s5, func_empty@gotpcrel32@hi+12
; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; GFX90A-NEXT: s_mov_b32 s14, s10
; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21]
; GFX90A-NEXT: v_mov_b32_e32 v31, v0
; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23]
; GFX90A-NEXT: s_mov_b32 s32, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def a[0:31]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[28:31], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[24:27], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[20:23], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[16:19], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[12:15], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[8:11], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[4:7], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[0:3], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_endpgm
;
; GFX908-LABEL: test_call_empty:
; GFX908: ; %bb.0: ; %bb
; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
; GFX908-NEXT: s_mov_b32 s22, -1
; GFX908-NEXT: s_mov_b32 s23, 0xe00000
; GFX908-NEXT: s_add_u32 s20, s20, s11
; GFX908-NEXT: s_addc_u32 s21, s21, 0
; GFX908-NEXT: s_mov_b32 s12, s8
; GFX908-NEXT: s_add_u32 s8, s4, 36
; GFX908-NEXT: s_mov_b32 s13, s9
; GFX908-NEXT: s_addc_u32 s9, s5, 0
; GFX908-NEXT: s_getpc_b64 s[4:5]
; GFX908-NEXT: s_add_u32 s4, s4, func_empty@gotpcrel32@lo+4
; GFX908-NEXT: s_addc_u32 s5, s5, func_empty@gotpcrel32@hi+12
; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; GFX908-NEXT: s_mov_b32 s14, s10
; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21]
; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23]
; GFX908-NEXT: s_mov_b32 s32, 0
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def a[0:31]
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: v_accvgpr_read_b32 v6, a3
; GFX908-NEXT: v_accvgpr_read_b32 v5, a2
; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
; GFX908-NEXT: v_accvgpr_read_b32 v3, a0
; GFX908-NEXT: v_accvgpr_read_b32 v10, a7
; GFX908-NEXT: v_accvgpr_read_b32 v9, a6
; GFX908-NEXT: v_accvgpr_read_b32 v8, a5
; GFX908-NEXT: v_accvgpr_read_b32 v7, a4
; GFX908-NEXT: v_accvgpr_read_b32 v14, a11
; GFX908-NEXT: v_accvgpr_read_b32 v13, a10
; GFX908-NEXT: v_accvgpr_read_b32 v12, a9
; GFX908-NEXT: v_accvgpr_read_b32 v11, a8
; GFX908-NEXT: v_accvgpr_read_b32 v18, a15
; GFX908-NEXT: v_accvgpr_read_b32 v17, a14
; GFX908-NEXT: v_accvgpr_read_b32 v16, a13
; GFX908-NEXT: v_accvgpr_read_b32 v15, a12
; GFX908-NEXT: v_accvgpr_read_b32 v22, a19
; GFX908-NEXT: v_accvgpr_read_b32 v21, a18
; GFX908-NEXT: v_accvgpr_read_b32 v20, a17
; GFX908-NEXT: v_accvgpr_read_b32 v19, a16
; GFX908-NEXT: v_accvgpr_read_b32 v26, a23
; GFX908-NEXT: v_accvgpr_read_b32 v25, a22
; GFX908-NEXT: v_accvgpr_read_b32 v24, a21
; GFX908-NEXT: v_accvgpr_read_b32 v23, a20
; GFX908-NEXT: v_accvgpr_read_b32 v30, a27
; GFX908-NEXT: v_accvgpr_read_b32 v29, a26
; GFX908-NEXT: v_accvgpr_read_b32 v28, a25
; GFX908-NEXT: v_accvgpr_read_b32 v27, a24
; GFX908-NEXT: v_accvgpr_read_b32 v35, a31
; GFX908-NEXT: v_accvgpr_read_b32 v34, a30
; GFX908-NEXT: v_accvgpr_read_b32 v33, a29
; GFX908-NEXT: v_accvgpr_read_b32 v32, a28
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_endpgm
bb:
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
call void @func_empty()
store volatile <32 x float> %reg, ptr addrspace(1) poison
ret void
}
define amdgpu_kernel void @test_call_areg4() #0 {
; GFX90A-LABEL: test_call_areg4:
; GFX90A: ; %bb.0: ; %bb
; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
; GFX90A-NEXT: s_mov_b32 s22, -1
; GFX90A-NEXT: s_mov_b32 s23, 0xe00000
; GFX90A-NEXT: s_add_u32 s20, s20, s11
; GFX90A-NEXT: s_addc_u32 s21, s21, 0
; GFX90A-NEXT: s_mov_b32 s12, s8
; GFX90A-NEXT: s_add_u32 s8, s4, 36
; GFX90A-NEXT: s_mov_b32 s13, s9
; GFX90A-NEXT: s_addc_u32 s9, s5, 0
; GFX90A-NEXT: s_getpc_b64 s[4:5]
; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_4@gotpcrel32@lo+4
; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_4@gotpcrel32@hi+12
; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; GFX90A-NEXT: s_mov_b32 s14, s10
; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21]
; GFX90A-NEXT: v_mov_b32_e32 v31, v0
; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23]
; GFX90A-NEXT: s_mov_b32 s32, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def a[4:35]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[28:31], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[24:27], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[20:23], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[16:19], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[12:15], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[8:11], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[4:7], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_endpgm
;
; GFX908-LABEL: test_call_areg4:
; GFX908: ; %bb.0: ; %bb
; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
; GFX908-NEXT: s_mov_b32 s22, -1
; GFX908-NEXT: s_mov_b32 s23, 0xe00000
; GFX908-NEXT: s_add_u32 s20, s20, s11
; GFX908-NEXT: s_addc_u32 s21, s21, 0
; GFX908-NEXT: s_mov_b32 s12, s8
; GFX908-NEXT: s_add_u32 s8, s4, 36
; GFX908-NEXT: s_mov_b32 s13, s9
; GFX908-NEXT: s_addc_u32 s9, s5, 0
; GFX908-NEXT: s_getpc_b64 s[4:5]
; GFX908-NEXT: s_add_u32 s4, s4, func_areg_4@gotpcrel32@lo+4
; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_4@gotpcrel32@hi+12
; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; GFX908-NEXT: s_mov_b32 s14, s10
; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21]
; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23]
; GFX908-NEXT: s_mov_b32 s32, 0
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def a[0:31]
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: v_accvgpr_read_b32 v6, a3
; GFX908-NEXT: v_accvgpr_read_b32 v5, a2
; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
; GFX908-NEXT: v_accvgpr_read_b32 v3, a0
; GFX908-NEXT: v_accvgpr_read_b32 v10, a7
; GFX908-NEXT: v_accvgpr_read_b32 v9, a6
; GFX908-NEXT: v_accvgpr_read_b32 v8, a5
; GFX908-NEXT: v_accvgpr_read_b32 v7, a4
; GFX908-NEXT: v_accvgpr_read_b32 v14, a11
; GFX908-NEXT: v_accvgpr_read_b32 v13, a10
; GFX908-NEXT: v_accvgpr_read_b32 v12, a9
; GFX908-NEXT: v_accvgpr_read_b32 v11, a8
; GFX908-NEXT: v_accvgpr_read_b32 v18, a15
; GFX908-NEXT: v_accvgpr_read_b32 v17, a14
; GFX908-NEXT: v_accvgpr_read_b32 v16, a13
; GFX908-NEXT: v_accvgpr_read_b32 v15, a12
; GFX908-NEXT: v_accvgpr_read_b32 v22, a19
; GFX908-NEXT: v_accvgpr_read_b32 v21, a18
; GFX908-NEXT: v_accvgpr_read_b32 v20, a17
; GFX908-NEXT: v_accvgpr_read_b32 v19, a16
; GFX908-NEXT: v_accvgpr_read_b32 v26, a23
; GFX908-NEXT: v_accvgpr_read_b32 v25, a22
; GFX908-NEXT: v_accvgpr_read_b32 v24, a21
; GFX908-NEXT: v_accvgpr_read_b32 v23, a20
; GFX908-NEXT: v_accvgpr_read_b32 v30, a27
; GFX908-NEXT: v_accvgpr_read_b32 v29, a26
; GFX908-NEXT: v_accvgpr_read_b32 v28, a25
; GFX908-NEXT: v_accvgpr_read_b32 v27, a24
; GFX908-NEXT: v_accvgpr_read_b32 v35, a31
; GFX908-NEXT: v_accvgpr_read_b32 v34, a30
; GFX908-NEXT: v_accvgpr_read_b32 v33, a29
; GFX908-NEXT: v_accvgpr_read_b32 v32, a28
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_endpgm
bb:
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
call void @func_areg_4()
store volatile <32 x float> %reg, ptr addrspace(1) poison
ret void
}
define amdgpu_kernel void @test_call_areg32() #0 {
; GFX90A-LABEL: test_call_areg32:
; GFX90A: ; %bb.0: ; %bb
; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
; GFX90A-NEXT: s_mov_b32 s22, -1
; GFX90A-NEXT: s_mov_b32 s23, 0xe00000
; GFX90A-NEXT: s_add_u32 s20, s20, s11
; GFX90A-NEXT: s_addc_u32 s21, s21, 0
; GFX90A-NEXT: s_mov_b32 s12, s8
; GFX90A-NEXT: s_add_u32 s8, s4, 36
; GFX90A-NEXT: s_mov_b32 s13, s9
; GFX90A-NEXT: s_addc_u32 s9, s5, 0
; GFX90A-NEXT: s_getpc_b64 s[4:5]
; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_32@gotpcrel32@lo+4
; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_32@gotpcrel32@hi+12
; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; GFX90A-NEXT: s_mov_b32 s14, s10
; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21]
; GFX90A-NEXT: v_mov_b32_e32 v31, v0
; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23]
; GFX90A-NEXT: s_mov_b32 s32, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def a[32:63]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[60:63], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[56:59], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[52:55], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[48:51], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[44:47], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[40:43], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[36:39], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_endpgm
;
; GFX908-LABEL: test_call_areg32:
; GFX908: ; %bb.0: ; %bb
; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
; GFX908-NEXT: s_mov_b32 s22, -1
; GFX908-NEXT: s_mov_b32 s23, 0xe00000
; GFX908-NEXT: s_add_u32 s20, s20, s11
; GFX908-NEXT: s_addc_u32 s21, s21, 0
; GFX908-NEXT: s_mov_b32 s12, s8
; GFX908-NEXT: s_add_u32 s8, s4, 36
; GFX908-NEXT: s_mov_b32 s13, s9
; GFX908-NEXT: s_addc_u32 s9, s5, 0
; GFX908-NEXT: s_getpc_b64 s[4:5]
; GFX908-NEXT: s_add_u32 s4, s4, func_areg_32@gotpcrel32@lo+4
; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_32@gotpcrel32@hi+12
; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; GFX908-NEXT: s_mov_b32 s14, s10
; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21]
; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23]
; GFX908-NEXT: s_mov_b32 s32, 0
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def a[0:31]
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: v_accvgpr_read_b32 v6, a3
; GFX908-NEXT: v_accvgpr_read_b32 v5, a2
; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
; GFX908-NEXT: v_accvgpr_read_b32 v3, a0
; GFX908-NEXT: v_accvgpr_read_b32 v10, a7
; GFX908-NEXT: v_accvgpr_read_b32 v9, a6
; GFX908-NEXT: v_accvgpr_read_b32 v8, a5
; GFX908-NEXT: v_accvgpr_read_b32 v7, a4
; GFX908-NEXT: v_accvgpr_read_b32 v14, a11
; GFX908-NEXT: v_accvgpr_read_b32 v13, a10
; GFX908-NEXT: v_accvgpr_read_b32 v12, a9
; GFX908-NEXT: v_accvgpr_read_b32 v11, a8
; GFX908-NEXT: v_accvgpr_read_b32 v18, a15
; GFX908-NEXT: v_accvgpr_read_b32 v17, a14
; GFX908-NEXT: v_accvgpr_read_b32 v16, a13
; GFX908-NEXT: v_accvgpr_read_b32 v15, a12
; GFX908-NEXT: v_accvgpr_read_b32 v22, a19
; GFX908-NEXT: v_accvgpr_read_b32 v21, a18
; GFX908-NEXT: v_accvgpr_read_b32 v20, a17
; GFX908-NEXT: v_accvgpr_read_b32 v19, a16
; GFX908-NEXT: v_accvgpr_read_b32 v26, a23
; GFX908-NEXT: v_accvgpr_read_b32 v25, a22
; GFX908-NEXT: v_accvgpr_read_b32 v24, a21
; GFX908-NEXT: v_accvgpr_read_b32 v23, a20
; GFX908-NEXT: v_accvgpr_read_b32 v30, a27
; GFX908-NEXT: v_accvgpr_read_b32 v29, a26
; GFX908-NEXT: v_accvgpr_read_b32 v28, a25
; GFX908-NEXT: v_accvgpr_read_b32 v27, a24
; GFX908-NEXT: v_accvgpr_read_b32 v35, a31
; GFX908-NEXT: v_accvgpr_read_b32 v34, a30
; GFX908-NEXT: v_accvgpr_read_b32 v33, a29
; GFX908-NEXT: v_accvgpr_read_b32 v32, a28
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_endpgm
bb:
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
call void @func_areg_32()
store volatile <32 x float> %reg, ptr addrspace(1) poison
ret void
}
define amdgpu_kernel void @test_call_areg64() #0 {
; GFX90A-LABEL: test_call_areg64:
; GFX90A: ; %bb.0: ; %bb
; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
; GFX90A-NEXT: s_mov_b32 s22, -1
; GFX90A-NEXT: s_mov_b32 s23, 0xe00000
; GFX90A-NEXT: s_add_u32 s20, s20, s11
; GFX90A-NEXT: s_addc_u32 s21, s21, 0
; GFX90A-NEXT: s_mov_b32 s12, s8
; GFX90A-NEXT: s_add_u32 s8, s4, 36
; GFX90A-NEXT: s_mov_b32 s13, s9
; GFX90A-NEXT: s_addc_u32 s9, s5, 0
; GFX90A-NEXT: s_getpc_b64 s[4:5]
; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_64@gotpcrel32@lo+4
; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_64@gotpcrel32@hi+12
; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; GFX90A-NEXT: s_mov_b32 s14, s10
; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21]
; GFX90A-NEXT: v_mov_b32_e32 v31, v0
; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23]
; GFX90A-NEXT: s_mov_b32 s32, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def a[0:31]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[28:31], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[24:27], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[20:23], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[16:19], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[12:15], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[8:11], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[4:7], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[0:3], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_endpgm
;
; GFX908-LABEL: test_call_areg64:
; GFX908: ; %bb.0: ; %bb
; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
; GFX908-NEXT: s_mov_b32 s22, -1
; GFX908-NEXT: s_mov_b32 s23, 0xe00000
; GFX908-NEXT: s_add_u32 s20, s20, s11
; GFX908-NEXT: s_addc_u32 s21, s21, 0
; GFX908-NEXT: s_mov_b32 s12, s8
; GFX908-NEXT: s_add_u32 s8, s4, 36
; GFX908-NEXT: s_mov_b32 s13, s9
; GFX908-NEXT: s_addc_u32 s9, s5, 0
; GFX908-NEXT: s_getpc_b64 s[4:5]
; GFX908-NEXT: s_add_u32 s4, s4, func_areg_64@gotpcrel32@lo+4
; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_64@gotpcrel32@hi+12
; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; GFX908-NEXT: s_mov_b32 s14, s10
; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21]
; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23]
; GFX908-NEXT: s_mov_b32 s32, 0
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def a[0:31]
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: v_accvgpr_read_b32 v6, a3
; GFX908-NEXT: v_accvgpr_read_b32 v5, a2
; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
; GFX908-NEXT: v_accvgpr_read_b32 v3, a0
; GFX908-NEXT: v_accvgpr_read_b32 v10, a7
; GFX908-NEXT: v_accvgpr_read_b32 v9, a6
; GFX908-NEXT: v_accvgpr_read_b32 v8, a5
; GFX908-NEXT: v_accvgpr_read_b32 v7, a4
; GFX908-NEXT: v_accvgpr_read_b32 v14, a11
; GFX908-NEXT: v_accvgpr_read_b32 v13, a10
; GFX908-NEXT: v_accvgpr_read_b32 v12, a9
; GFX908-NEXT: v_accvgpr_read_b32 v11, a8
; GFX908-NEXT: v_accvgpr_read_b32 v18, a15
; GFX908-NEXT: v_accvgpr_read_b32 v17, a14
; GFX908-NEXT: v_accvgpr_read_b32 v16, a13
; GFX908-NEXT: v_accvgpr_read_b32 v15, a12
; GFX908-NEXT: v_accvgpr_read_b32 v22, a19
; GFX908-NEXT: v_accvgpr_read_b32 v21, a18
; GFX908-NEXT: v_accvgpr_read_b32 v20, a17
; GFX908-NEXT: v_accvgpr_read_b32 v19, a16
; GFX908-NEXT: v_accvgpr_read_b32 v26, a23
; GFX908-NEXT: v_accvgpr_read_b32 v25, a22
; GFX908-NEXT: v_accvgpr_read_b32 v24, a21
; GFX908-NEXT: v_accvgpr_read_b32 v23, a20
; GFX908-NEXT: v_accvgpr_read_b32 v30, a27
; GFX908-NEXT: v_accvgpr_read_b32 v29, a26
; GFX908-NEXT: v_accvgpr_read_b32 v28, a25
; GFX908-NEXT: v_accvgpr_read_b32 v27, a24
; GFX908-NEXT: v_accvgpr_read_b32 v35, a31
; GFX908-NEXT: v_accvgpr_read_b32 v34, a30
; GFX908-NEXT: v_accvgpr_read_b32 v33, a29
; GFX908-NEXT: v_accvgpr_read_b32 v32, a28
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_endpgm
bb:
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
call void @func_areg_64()
store volatile <32 x float> %reg, ptr addrspace(1) poison
ret void
}
define amdgpu_kernel void @test_call_areg31_63() #0 {
; GFX90A-LABEL: test_call_areg31_63:
; GFX90A: ; %bb.0: ; %bb
; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
; GFX90A-NEXT: s_mov_b32 s22, -1
; GFX90A-NEXT: s_mov_b32 s23, 0xe00000
; GFX90A-NEXT: s_add_u32 s20, s20, s11
; GFX90A-NEXT: s_addc_u32 s21, s21, 0
; GFX90A-NEXT: s_mov_b32 s12, s8
; GFX90A-NEXT: s_add_u32 s8, s4, 36
; GFX90A-NEXT: s_mov_b32 s13, s9
; GFX90A-NEXT: s_addc_u32 s9, s5, 0
; GFX90A-NEXT: s_getpc_b64 s[4:5]
; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_31_63@gotpcrel32@lo+4
; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_31_63@gotpcrel32@hi+12
; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; GFX90A-NEXT: s_mov_b32 s14, s10
; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21]
; GFX90A-NEXT: v_mov_b32_e32 v31, v0
; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23]
; GFX90A-NEXT: s_mov_b32 s32, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def a[32:63]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[60:63], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[56:59], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[52:55], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[48:51], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[44:47], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[40:43], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[36:39], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_endpgm
;
; GFX908-LABEL: test_call_areg31_63:
; GFX908: ; %bb.0: ; %bb
; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
; GFX908-NEXT: s_mov_b32 s22, -1
; GFX908-NEXT: s_mov_b32 s23, 0xe00000
; GFX908-NEXT: s_add_u32 s20, s20, s11
; GFX908-NEXT: s_addc_u32 s21, s21, 0
; GFX908-NEXT: s_mov_b32 s12, s8
; GFX908-NEXT: s_add_u32 s8, s4, 36
; GFX908-NEXT: s_mov_b32 s13, s9
; GFX908-NEXT: s_addc_u32 s9, s5, 0
; GFX908-NEXT: s_getpc_b64 s[4:5]
; GFX908-NEXT: s_add_u32 s4, s4, func_areg_31_63@gotpcrel32@lo+4
; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_31_63@gotpcrel32@hi+12
; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; GFX908-NEXT: s_mov_b32 s14, s10
; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21]
; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23]
; GFX908-NEXT: s_mov_b32 s32, 0
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def a[0:31]
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: v_accvgpr_read_b32 v6, a3
; GFX908-NEXT: v_accvgpr_read_b32 v5, a2
; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
; GFX908-NEXT: v_accvgpr_read_b32 v3, a0
; GFX908-NEXT: v_accvgpr_read_b32 v10, a7
; GFX908-NEXT: v_accvgpr_read_b32 v9, a6
; GFX908-NEXT: v_accvgpr_read_b32 v8, a5
; GFX908-NEXT: v_accvgpr_read_b32 v7, a4
; GFX908-NEXT: v_accvgpr_read_b32 v14, a11
; GFX908-NEXT: v_accvgpr_read_b32 v13, a10
; GFX908-NEXT: v_accvgpr_read_b32 v12, a9
; GFX908-NEXT: v_accvgpr_read_b32 v11, a8
; GFX908-NEXT: v_accvgpr_read_b32 v18, a15
; GFX908-NEXT: v_accvgpr_read_b32 v17, a14
; GFX908-NEXT: v_accvgpr_read_b32 v16, a13
; GFX908-NEXT: v_accvgpr_read_b32 v15, a12
; GFX908-NEXT: v_accvgpr_read_b32 v22, a19
; GFX908-NEXT: v_accvgpr_read_b32 v21, a18
; GFX908-NEXT: v_accvgpr_read_b32 v20, a17
; GFX908-NEXT: v_accvgpr_read_b32 v19, a16
; GFX908-NEXT: v_accvgpr_read_b32 v26, a23
; GFX908-NEXT: v_accvgpr_read_b32 v25, a22
; GFX908-NEXT: v_accvgpr_read_b32 v24, a21
; GFX908-NEXT: v_accvgpr_read_b32 v23, a20
; GFX908-NEXT: v_accvgpr_read_b32 v30, a27
; GFX908-NEXT: v_accvgpr_read_b32 v29, a26
; GFX908-NEXT: v_accvgpr_read_b32 v28, a25
; GFX908-NEXT: v_accvgpr_read_b32 v27, a24
; GFX908-NEXT: v_accvgpr_read_b32 v35, a31
; GFX908-NEXT: v_accvgpr_read_b32 v34, a30
; GFX908-NEXT: v_accvgpr_read_b32 v33, a29
; GFX908-NEXT: v_accvgpr_read_b32 v32, a28
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_endpgm
bb:
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
call void @func_areg_31_63()
store volatile <32 x float> %reg, ptr addrspace(1) poison
ret void
}
define amdgpu_kernel void @test_call_unknown() #0 {
; GFX90A-LABEL: test_call_unknown:
; GFX90A: ; %bb.0: ; %bb
; GFX90A-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
; GFX90A-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
; GFX90A-NEXT: s_mov_b32 s38, -1
; GFX90A-NEXT: s_mov_b32 s39, 0xe00000
; GFX90A-NEXT: s_add_u32 s36, s36, s11
; GFX90A-NEXT: s_addc_u32 s37, s37, 0
; GFX90A-NEXT: s_mov_b32 s12, s8
; GFX90A-NEXT: s_add_u32 s8, s4, 36
; GFX90A-NEXT: s_mov_b32 s13, s9
; GFX90A-NEXT: s_addc_u32 s9, s5, 0
; GFX90A-NEXT: s_getpc_b64 s[4:5]
; GFX90A-NEXT: s_add_u32 s4, s4, func_unknown@gotpcrel32@lo+4
; GFX90A-NEXT: s_addc_u32 s5, s5, func_unknown@gotpcrel32@hi+12
; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; GFX90A-NEXT: s_mov_b32 s14, s10
; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX90A-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX90A-NEXT: v_mov_b32_e32 v31, v0
; GFX90A-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX90A-NEXT: s_mov_b32 s32, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def a[32:63]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[60:63], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[56:59], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[52:55], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[48:51], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[44:47], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[40:43], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[36:39], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_endpgm
;
; GFX908-LABEL: test_call_unknown:
; GFX908: ; %bb.0: ; %bb
; GFX908-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
; GFX908-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
; GFX908-NEXT: s_mov_b32 s38, -1
; GFX908-NEXT: s_mov_b32 s39, 0xe00000
; GFX908-NEXT: s_add_u32 s36, s36, s11
; GFX908-NEXT: s_addc_u32 s37, s37, 0
; GFX908-NEXT: s_mov_b32 s12, s8
; GFX908-NEXT: s_add_u32 s8, s4, 36
; GFX908-NEXT: s_mov_b32 s13, s9
; GFX908-NEXT: s_addc_u32 s9, s5, 0
; GFX908-NEXT: s_getpc_b64 s[4:5]
; GFX908-NEXT: s_add_u32 s4, s4, func_unknown@gotpcrel32@lo+4
; GFX908-NEXT: s_addc_u32 s5, s5, func_unknown@gotpcrel32@hi+12
; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; GFX908-NEXT: s_mov_b32 s14, s10
; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX908-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX908-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX908-NEXT: s_mov_b32 s32, 0
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def a[0:31]
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: v_accvgpr_read_b32 v43, a3
; GFX908-NEXT: v_accvgpr_read_b32 v42, a2
; GFX908-NEXT: v_accvgpr_read_b32 v41, a1
; GFX908-NEXT: v_accvgpr_read_b32 v40, a0
; GFX908-NEXT: v_accvgpr_read_b32 v47, a7
; GFX908-NEXT: v_accvgpr_read_b32 v46, a6
; GFX908-NEXT: v_accvgpr_read_b32 v45, a5
; GFX908-NEXT: v_accvgpr_read_b32 v44, a4
; GFX908-NEXT: v_accvgpr_read_b32 v59, a11
; GFX908-NEXT: v_accvgpr_read_b32 v58, a10
; GFX908-NEXT: v_accvgpr_read_b32 v57, a9
; GFX908-NEXT: v_accvgpr_read_b32 v56, a8
; GFX908-NEXT: v_accvgpr_read_b32 v63, a15
; GFX908-NEXT: v_accvgpr_read_b32 v62, a14
; GFX908-NEXT: v_accvgpr_read_b32 v61, a13
; GFX908-NEXT: v_accvgpr_read_b32 v60, a12
; GFX908-NEXT: v_accvgpr_read_b32 v75, a19
; GFX908-NEXT: v_accvgpr_read_b32 v74, a18
; GFX908-NEXT: v_accvgpr_read_b32 v73, a17
; GFX908-NEXT: v_accvgpr_read_b32 v72, a16
; GFX908-NEXT: v_accvgpr_read_b32 v79, a23
; GFX908-NEXT: v_accvgpr_read_b32 v78, a22
; GFX908-NEXT: v_accvgpr_read_b32 v77, a21
; GFX908-NEXT: v_accvgpr_read_b32 v76, a20
; GFX908-NEXT: v_accvgpr_read_b32 v91, a27
; GFX908-NEXT: v_accvgpr_read_b32 v90, a26
; GFX908-NEXT: v_accvgpr_read_b32 v89, a25
; GFX908-NEXT: v_accvgpr_read_b32 v88, a24
; GFX908-NEXT: v_accvgpr_read_b32 v95, a31
; GFX908-NEXT: v_accvgpr_read_b32 v94, a30
; GFX908-NEXT: v_accvgpr_read_b32 v93, a29
; GFX908-NEXT: v_accvgpr_read_b32 v92, a28
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[92:95], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[88:91], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[76:79], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[72:75], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[60:63], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[56:59], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[44:47], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: global_store_dwordx4 v[0:1], v[40:43], off
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_endpgm
bb:
%reg = call <32 x float> asm sideeffect "; def $0", "=a"()
call void @func_unknown()
store volatile <32 x float> %reg, ptr addrspace(1) poison
ret void
}
attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" }