276 lines
12 KiB
LLVM
276 lines
12 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-SDAG %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISEL %s
|
|
|
|
@bar = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
|
|
@bar2 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
|
|
@bar3 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
|
|
|
|
define void @func1() {
|
|
; GFX12-SDAG-LABEL: func1:
|
|
; GFX12-SDAG: ; %bb.0:
|
|
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70003
|
|
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-SDAG-NEXT: s_barrier_signal m0
|
|
; GFX12-SDAG-NEXT: s_mov_b32 m0, 3
|
|
; GFX12-SDAG-NEXT: s_barrier_join m0
|
|
; GFX12-SDAG-NEXT: s_barrier_wait 1
|
|
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-GISEL-LABEL: func1:
|
|
; GFX12-GISEL: ; %bb.0:
|
|
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-GISEL-NEXT: s_mov_b32 m0, 0x70003
|
|
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-GISEL-NEXT: s_barrier_signal m0
|
|
; GFX12-GISEL-NEXT: s_barrier_join 3
|
|
; GFX12-GISEL-NEXT: s_barrier_wait 1
|
|
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar3, i32 7)
|
|
call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar3)
|
|
call void @llvm.amdgcn.s.barrier.wait(i16 1)
|
|
ret void
|
|
}
|
|
|
|
define void @func2() {
|
|
; GFX12-SDAG-LABEL: func2:
|
|
; GFX12-SDAG: ; %bb.0:
|
|
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70001
|
|
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-SDAG-NEXT: s_barrier_signal m0
|
|
; GFX12-SDAG-NEXT: s_mov_b32 m0, 1
|
|
; GFX12-SDAG-NEXT: s_barrier_join m0
|
|
; GFX12-SDAG-NEXT: s_barrier_wait 1
|
|
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-GISEL-LABEL: func2:
|
|
; GFX12-GISEL: ; %bb.0:
|
|
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-GISEL-NEXT: s_mov_b32 m0, 0x70001
|
|
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
|
|
; GFX12-GISEL-NEXT: s_barrier_signal m0
|
|
; GFX12-GISEL-NEXT: s_barrier_join 1
|
|
; GFX12-GISEL-NEXT: s_barrier_wait 1
|
|
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar2, i32 7)
|
|
call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar2)
|
|
call void @llvm.amdgcn.s.barrier.wait(i16 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 {
|
|
; GFX12-SDAG-LABEL: kernel1:
|
|
; GFX12-SDAG: ; %bb.0:
|
|
; GFX12-SDAG-NEXT: s_mov_b64 s[10:11], s[6:7]
|
|
; GFX12-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3]
|
|
; GFX12-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c
|
|
; GFX12-SDAG-NEXT: s_mov_b32 m0, 0xc0002
|
|
; GFX12-SDAG-NEXT: v_mov_b32_e32 v31, v0
|
|
; GFX12-SDAG-NEXT: s_barrier_init m0
|
|
; GFX12-SDAG-NEXT: s_add_nc_u64 s[8:9], s[4:5], 48
|
|
; GFX12-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1]
|
|
; GFX12-SDAG-NEXT: s_mov_b32 s32, 0
|
|
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-SDAG-NEXT: s_lshr_b32 s2, s2, 4
|
|
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX12-SDAG-NEXT: s_and_b32 s2, s2, 63
|
|
; GFX12-SDAG-NEXT: s_or_b32 s3, 0x90000, s2
|
|
; GFX12-SDAG-NEXT: s_cmp_eq_u32 0, 0
|
|
; GFX12-SDAG-NEXT: s_mov_b32 m0, s3
|
|
; GFX12-SDAG-NEXT: s_barrier_init m0
|
|
; GFX12-SDAG-NEXT: s_mov_b32 m0, 0xc0002
|
|
; GFX12-SDAG-NEXT: s_barrier_signal m0
|
|
; GFX12-SDAG-NEXT: s_mov_b32 m0, s3
|
|
; GFX12-SDAG-NEXT: s_barrier_signal m0
|
|
; GFX12-SDAG-NEXT: s_mov_b32 m0, s2
|
|
; GFX12-SDAG-NEXT: s_barrier_signal -1
|
|
; GFX12-SDAG-NEXT: s_barrier_signal_isfirst -1
|
|
; GFX12-SDAG-NEXT: s_barrier_join m0
|
|
; GFX12-SDAG-NEXT: s_mov_b32 m0, 2
|
|
; GFX12-SDAG-NEXT: s_barrier_wait 1
|
|
; GFX12-SDAG-NEXT: s_barrier_leave
|
|
; GFX12-SDAG-NEXT: s_get_barrier_state s3, m0
|
|
; GFX12-SDAG-NEXT: s_mov_b32 m0, s2
|
|
; GFX12-SDAG-NEXT: s_get_barrier_state s2, m0
|
|
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-SDAG-NEXT: s_getpc_b64 s[2:3]
|
|
; GFX12-SDAG-NEXT: s_sext_i32_i16 s3, s3
|
|
; GFX12-SDAG-NEXT: s_add_co_u32 s2, s2, func1@gotpcrel32@lo+8
|
|
; GFX12-SDAG-NEXT: s_add_co_ci_u32 s3, s3, func1@gotpcrel32@hi+16
|
|
; GFX12-SDAG-NEXT: s_barrier_signal -1
|
|
; GFX12-SDAG-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
|
|
; GFX12-SDAG-NEXT: s_barrier_wait -1
|
|
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[2:3]
|
|
; GFX12-SDAG-NEXT: s_getpc_b64 s[2:3]
|
|
; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
|
|
; GFX12-SDAG-NEXT: s_sext_i32_i16 s3, s3
|
|
; GFX12-SDAG-NEXT: s_add_co_u32 s2, s2, func2@gotpcrel32@lo+12
|
|
; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
|
|
; GFX12-SDAG-NEXT: s_add_co_ci_u32 s3, s3, func2@gotpcrel32@hi+24
|
|
; GFX12-SDAG-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
|
|
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[2:3]
|
|
; GFX12-SDAG-NEXT: s_get_barrier_state s0, -1
|
|
; GFX12-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX12-GISEL-LABEL: kernel1:
|
|
; GFX12-GISEL: ; %bb.0:
|
|
; GFX12-GISEL-NEXT: s_mov_b64 s[12:13], s[4:5]
|
|
; GFX12-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1]
|
|
; GFX12-GISEL-NEXT: s_load_b32 s0, s[12:13], 0x2c
|
|
; GFX12-GISEL-NEXT: s_mov_b32 m0, 0xc0002
|
|
; GFX12-GISEL-NEXT: v_mov_b32_e32 v31, v0
|
|
; GFX12-GISEL-NEXT: s_barrier_init m0
|
|
; GFX12-GISEL-NEXT: s_mov_b64 s[10:11], s[6:7]
|
|
; GFX12-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
|
|
; GFX12-GISEL-NEXT: s_mov_b32 s32, 0
|
|
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-GISEL-NEXT: s_lshr_b32 s0, s0, 4
|
|
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX12-GISEL-NEXT: s_and_b32 s0, s0, 63
|
|
; GFX12-GISEL-NEXT: s_or_b32 s1, s0, 0x90000
|
|
; GFX12-GISEL-NEXT: s_cmp_eq_u32 0, 0
|
|
; GFX12-GISEL-NEXT: s_mov_b32 m0, s1
|
|
; GFX12-GISEL-NEXT: s_barrier_init m0
|
|
; GFX12-GISEL-NEXT: s_mov_b32 m0, 0xc0002
|
|
; GFX12-GISEL-NEXT: s_barrier_signal m0
|
|
; GFX12-GISEL-NEXT: s_mov_b32 m0, s1
|
|
; GFX12-GISEL-NEXT: s_barrier_signal m0
|
|
; GFX12-GISEL-NEXT: s_barrier_signal -1
|
|
; GFX12-GISEL-NEXT: s_barrier_signal_isfirst -1
|
|
; GFX12-GISEL-NEXT: s_mov_b32 m0, s0
|
|
; GFX12-GISEL-NEXT: s_add_co_u32 s8, s12, 48
|
|
; GFX12-GISEL-NEXT: s_barrier_join m0
|
|
; GFX12-GISEL-NEXT: s_barrier_wait 1
|
|
; GFX12-GISEL-NEXT: s_barrier_leave
|
|
; GFX12-GISEL-NEXT: s_get_barrier_state s0, 2
|
|
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-GISEL-NEXT: s_get_barrier_state s0, m0
|
|
; GFX12-GISEL-NEXT: s_add_co_ci_u32 s9, s13, 0
|
|
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-GISEL-NEXT: s_getpc_b64 s[0:1]
|
|
; GFX12-GISEL-NEXT: s_sext_i32_i16 s1, s1
|
|
; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, func1@gotpcrel32@lo+8
|
|
; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, func1@gotpcrel32@hi+16
|
|
; GFX12-GISEL-NEXT: s_barrier_signal -1
|
|
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
|
; GFX12-GISEL-NEXT: s_barrier_wait -1
|
|
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
|
; GFX12-GISEL-NEXT: s_add_co_u32 s8, s12, 48
|
|
; GFX12-GISEL-NEXT: s_add_co_ci_u32 s9, s13, 0
|
|
; GFX12-GISEL-NEXT: s_getpc_b64 s[0:1]
|
|
; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
|
|
; GFX12-GISEL-NEXT: s_sext_i32_i16 s1, s1
|
|
; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, func2@gotpcrel32@lo+12
|
|
; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
|
|
; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, func2@gotpcrel32@hi+24
|
|
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
|
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
|
; GFX12-GISEL-NEXT: s_get_barrier_state s0, -1
|
|
; GFX12-GISEL-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) @bar, i32 12)
|
|
call void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) %in, i32 9)
|
|
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar, i32 12)
|
|
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) %in, i32 9)
|
|
call void @llvm.amdgcn.s.barrier.signal(i32 -1)
|
|
%isfirst = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 -1)
|
|
call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) %in)
|
|
call void @llvm.amdgcn.s.barrier.wait(i16 1)
|
|
call void @llvm.amdgcn.s.barrier.leave(i16 1)
|
|
%state = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) @bar)
|
|
%state2 = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) %in)
|
|
call void @llvm.amdgcn.s.barrier()
|
|
call void @func1()
|
|
call void @func2()
|
|
%state3 = call i32 @llvm.amdgcn.s.get.barrier.state(i32 -1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel2(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 {
|
|
; GFX12-SDAG-LABEL: kernel2:
|
|
; GFX12-SDAG: ; %bb.0:
|
|
; GFX12-SDAG-NEXT: s_mov_b64 s[10:11], s[6:7]
|
|
; GFX12-SDAG-NEXT: s_getpc_b64 s[6:7]
|
|
; GFX12-SDAG-NEXT: s_sext_i32_i16 s7, s7
|
|
; GFX12-SDAG-NEXT: s_add_co_u32 s6, s6, func2@gotpcrel32@lo+8
|
|
; GFX12-SDAG-NEXT: s_add_co_ci_u32 s7, s7, func2@gotpcrel32@hi+16
|
|
; GFX12-SDAG-NEXT: v_mov_b32_e32 v31, v0
|
|
; GFX12-SDAG-NEXT: s_load_b64 s[12:13], s[6:7], 0x0
|
|
; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70002
|
|
; GFX12-SDAG-NEXT: s_add_nc_u64 s[8:9], s[4:5], 48
|
|
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-SDAG-NEXT: s_barrier_signal m0
|
|
; GFX12-SDAG-NEXT: s_mov_b32 m0, 2
|
|
; GFX12-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1]
|
|
; GFX12-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3]
|
|
; GFX12-SDAG-NEXT: s_mov_b32 s32, 0
|
|
; GFX12-SDAG-NEXT: s_barrier_join m0
|
|
; GFX12-SDAG-NEXT: s_barrier_wait 1
|
|
; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[12:13]
|
|
; GFX12-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX12-GISEL-LABEL: kernel2:
|
|
; GFX12-GISEL: ; %bb.0:
|
|
; GFX12-GISEL-NEXT: s_add_co_u32 s8, s4, 48
|
|
; GFX12-GISEL-NEXT: s_add_co_ci_u32 s9, s5, 0
|
|
; GFX12-GISEL-NEXT: s_getpc_b64 s[4:5]
|
|
; GFX12-GISEL-NEXT: s_sext_i32_i16 s5, s5
|
|
; GFX12-GISEL-NEXT: s_add_co_u32 s4, s4, func2@gotpcrel32@lo+8
|
|
; GFX12-GISEL-NEXT: s_add_co_ci_u32 s5, s5, func2@gotpcrel32@hi+16
|
|
; GFX12-GISEL-NEXT: v_mov_b32_e32 v31, v0
|
|
; GFX12-GISEL-NEXT: s_load_b64 s[12:13], s[4:5], 0x0
|
|
; GFX12-GISEL-NEXT: s_mov_b64 s[10:11], s[6:7]
|
|
; GFX12-GISEL-NEXT: s_mov_b32 m0, 0x70002
|
|
; GFX12-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1]
|
|
; GFX12-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
|
|
; GFX12-GISEL-NEXT: s_mov_b32 s32, 0
|
|
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-GISEL-NEXT: s_barrier_signal m0
|
|
; GFX12-GISEL-NEXT: s_barrier_join 2
|
|
; GFX12-GISEL-NEXT: s_barrier_wait 1
|
|
; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[12:13]
|
|
; GFX12-GISEL-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar, i32 7)
|
|
call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar)
|
|
call void @llvm.amdgcn.s.barrier.wait(i16 1)
|
|
|
|
call void @func2()
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.amdgcn.s.barrier() #1
|
|
declare void @llvm.amdgcn.s.barrier.wait(i16) #1
|
|
declare void @llvm.amdgcn.s.barrier.signal(i32) #1
|
|
declare void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3), i32) #1
|
|
declare i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32) #1
|
|
declare void @llvm.amdgcn.s.barrier.init(ptr addrspace(3), i32) #1
|
|
declare void @llvm.amdgcn.s.barrier.join(ptr addrspace(3)) #1
|
|
declare void @llvm.amdgcn.s.barrier.leave(i16) #1
|
|
declare i32 @llvm.amdgcn.s.get.barrier.state(i32) #1
|
|
declare i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3)) #1
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { convergent nounwind }
|
|
attributes #2 = { nounwind readnone }
|