On targets that require even aligned 64-bit VGPRs, GWS operands require even alignment of a 32-bit operand. Previously we had a hacky post-processing which added an implicit operand to try to manage the constraint. This would require special casing in other passes to avoid breaking the operand constraint. This moves the handling into the instruction definition, so other passes no longer need to consider this edge case. MC still does need to special case this, to print/parse as a 32-bit register. This also still ends up net less work than introducing even aligned 32-bit register classes. This also should be applied to the image special case.
270 lines
8.8 KiB
LLVM
270 lines
8.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=CHECK,SDAG %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=CHECK,GISEL %s
|
|
|
|
define void @gws_init_offset0() #0 {
|
|
; CHECK-LABEL: gws_init_offset0:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ;;#ASMSTART
|
|
; CHECK-NEXT: ; def a0
|
|
; CHECK-NEXT: ;;#ASMEND
|
|
; CHECK-NEXT: s_mov_b32 m0, 0
|
|
; CHECK-NEXT: s_nop 0
|
|
; CHECK-NEXT: ds_gws_init a0 gds
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
%val = call i32 asm "; def $0", "=a"()
|
|
call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define void @gws_init_offset63() #0 {
|
|
; CHECK-LABEL: gws_init_offset63:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ;;#ASMSTART
|
|
; CHECK-NEXT: ; def a0
|
|
; CHECK-NEXT: ;;#ASMEND
|
|
; CHECK-NEXT: s_mov_b32 m0, 0
|
|
; CHECK-NEXT: s_nop 0
|
|
; CHECK-NEXT: ds_gws_init a0 offset:63 gds
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
%val = call i32 asm "; def $0", "=a"()
|
|
call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 63)
|
|
ret void
|
|
}
|
|
|
|
define void @gws_init_sgpr_offset(i32 inreg %offset) #0 {
|
|
; CHECK-LABEL: gws_init_sgpr_offset:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ;;#ASMSTART
|
|
; CHECK-NEXT: ; def a0
|
|
; CHECK-NEXT: ;;#ASMEND
|
|
; CHECK-NEXT: s_lshl_b32 m0, s16, 16
|
|
; CHECK-NEXT: s_nop 0
|
|
; CHECK-NEXT: ds_gws_init a0 gds
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
%val = call i32 asm "; def $0", "=a"()
|
|
call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @gws_init_agpr_offset() #0 {
|
|
; CHECK-LABEL: gws_init_agpr_offset:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: ;;#ASMSTART
|
|
; CHECK-NEXT: ; def a1
|
|
; CHECK-NEXT: ;;#ASMEND
|
|
; CHECK-NEXT: v_accvgpr_read_b32 v0, a1
|
|
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
|
|
; CHECK-NEXT: ;;#ASMSTART
|
|
; CHECK-NEXT: ; def a0
|
|
; CHECK-NEXT: ;;#ASMEND
|
|
; CHECK-NEXT: s_lshl_b32 m0, s0, 16
|
|
; CHECK-NEXT: s_nop 0
|
|
; CHECK-NEXT: ds_gws_init a0 gds
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_endpgm
|
|
%val = call i32 asm "; def $0", "=a"()
|
|
%offset = call i32 asm "; def $0", "=a"()
|
|
call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset)
|
|
ret void
|
|
}
|
|
|
|
define void @gws_init_agpr_offset_add1() #0 {
|
|
; CHECK-LABEL: gws_init_agpr_offset_add1:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ;;#ASMSTART
|
|
; CHECK-NEXT: ; def a1
|
|
; CHECK-NEXT: ;;#ASMEND
|
|
; CHECK-NEXT: v_accvgpr_read_b32 v0, a1
|
|
; CHECK-NEXT: v_readfirstlane_b32 s4, v0
|
|
; CHECK-NEXT: ;;#ASMSTART
|
|
; CHECK-NEXT: ; def a0
|
|
; CHECK-NEXT: ;;#ASMEND
|
|
; CHECK-NEXT: s_lshl_b32 m0, s4, 16
|
|
; CHECK-NEXT: s_nop 0
|
|
; CHECK-NEXT: ds_gws_init a0 offset:1 gds
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
%val = call i32 asm "; def $0", "=a"()
|
|
%offset.base = call i32 asm "; def $0", "=a"()
|
|
%offset = add i32 %offset.base, 1
|
|
call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @gws_init_vgpr_offset_add(i32 %val) #0 {
|
|
; CHECK-LABEL: gws_init_vgpr_offset_add:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_load_dword s0, s[8:9], 0x0
|
|
; CHECK-NEXT: ;;#ASMSTART
|
|
; CHECK-NEXT: ; def a0
|
|
; CHECK-NEXT: ;;#ASMEND
|
|
; CHECK-NEXT: v_accvgpr_read_b32 v0, a0
|
|
; CHECK-NEXT: v_readfirstlane_b32 s1, v0
|
|
; CHECK-NEXT: s_lshl_b32 m0, s1, 16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: v_mov_b32_e32 v0, s0
|
|
; CHECK-NEXT: ds_gws_init v0 offset:3 gds
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_endpgm
|
|
%agpr.offset.base = call i32 asm "; def $0", "=a"()
|
|
%agpr.offset = add i32 %agpr.offset.base, 3
|
|
call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %agpr.offset)
|
|
ret void
|
|
}
|
|
|
|
define void @gws_barrier_offset0() #0 {
|
|
; CHECK-LABEL: gws_barrier_offset0:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ;;#ASMSTART
|
|
; CHECK-NEXT: ; def a0
|
|
; CHECK-NEXT: ;;#ASMEND
|
|
; CHECK-NEXT: s_mov_b32 m0, 0
|
|
; CHECK-NEXT: s_nop 0
|
|
; CHECK-NEXT: ds_gws_barrier a0 gds
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
%val = call i32 asm "; def $0", "=a"()
|
|
call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define void @gws_barrier_offset63() #0 {
|
|
; CHECK-LABEL: gws_barrier_offset63:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ;;#ASMSTART
|
|
; CHECK-NEXT: ; def a0
|
|
; CHECK-NEXT: ;;#ASMEND
|
|
; CHECK-NEXT: s_mov_b32 m0, 0
|
|
; CHECK-NEXT: s_nop 0
|
|
; CHECK-NEXT: ds_gws_barrier a0 offset:63 gds
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
%val = call i32 asm "; def $0", "=a"()
|
|
call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 63)
|
|
ret void
|
|
}
|
|
|
|
define void @gws_barrier_sgpr_offset(i32 inreg %offset) #0 {
|
|
; CHECK-LABEL: gws_barrier_sgpr_offset:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ;;#ASMSTART
|
|
; CHECK-NEXT: ; def a0
|
|
; CHECK-NEXT: ;;#ASMEND
|
|
; CHECK-NEXT: s_lshl_b32 m0, s16, 16
|
|
; CHECK-NEXT: s_nop 0
|
|
; CHECK-NEXT: ds_gws_barrier a0 gds
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
%val = call i32 asm "; def $0", "=a"()
|
|
call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 %offset)
|
|
ret void
|
|
}
|
|
|
|
define void @gws_sema_v_offset0() #0 {
|
|
; SDAG-LABEL: gws_sema_v_offset0:
|
|
; SDAG: ; %bb.0:
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-NEXT: s_mov_b32 m0, 0
|
|
; SDAG-NEXT: s_nop 0
|
|
; SDAG-NEXT: ds_gws_sema_v gds
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-LABEL: gws_sema_v_offset0:
|
|
; GISEL: ; %bb.0:
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-NEXT: s_mov_b32 m0, 0
|
|
; GISEL-NEXT: s_nop 0
|
|
; GISEL-NEXT: ds_gws_sema_v gds
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-NEXT: ;;#ASMSTART
|
|
; GISEL-NEXT: ; def a0
|
|
; GISEL-NEXT: ;;#ASMEND
|
|
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%val = call i32 asm "; def $0", "=a"()
|
|
call void @llvm.amdgcn.ds.gws.sema.v(i32 0)
|
|
ret void
|
|
}
|
|
|
|
define void @gws_sema_br_offset0() #0 {
|
|
; CHECK-LABEL: gws_sema_br_offset0:
|
|
; CHECK: ; %bb.0:
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ;;#ASMSTART
|
|
; CHECK-NEXT: ; def a0
|
|
; CHECK-NEXT: ;;#ASMEND
|
|
; CHECK-NEXT: s_mov_b32 m0, 0
|
|
; CHECK-NEXT: s_nop 0
|
|
; CHECK-NEXT: ds_gws_sema_br a0 gds
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
%val = call i32 asm "; def $0", "=a"()
|
|
call void @llvm.amdgcn.ds.gws.sema.br(i32 %val, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define void @gws_sema_p_offset0() #0 {
|
|
; SDAG-LABEL: gws_sema_p_offset0:
|
|
; SDAG: ; %bb.0:
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-NEXT: s_mov_b32 m0, 0
|
|
; SDAG-NEXT: s_nop 0
|
|
; SDAG-NEXT: ds_gws_sema_p gds
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-LABEL: gws_sema_p_offset0:
|
|
; GISEL: ; %bb.0:
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-NEXT: s_mov_b32 m0, 0
|
|
; GISEL-NEXT: s_nop 0
|
|
; GISEL-NEXT: ds_gws_sema_p gds
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-NEXT: ;;#ASMSTART
|
|
; GISEL-NEXT: ; def a0
|
|
; GISEL-NEXT: ;;#ASMEND
|
|
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%val = call i32 asm "; def $0", "=a"()
|
|
call void @llvm.amdgcn.ds.gws.sema.p(i32 0)
|
|
ret void
|
|
}
|
|
|
|
define void @gws_sema_release_all_offset0() #0 {
|
|
; SDAG-LABEL: gws_sema_release_all_offset0:
|
|
; SDAG: ; %bb.0:
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-NEXT: s_mov_b32 m0, 0
|
|
; SDAG-NEXT: s_nop 0
|
|
; SDAG-NEXT: ds_gws_sema_release_all gds
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-LABEL: gws_sema_release_all_offset0:
|
|
; GISEL: ; %bb.0:
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-NEXT: s_mov_b32 m0, 0
|
|
; GISEL-NEXT: s_nop 0
|
|
; GISEL-NEXT: ds_gws_sema_release_all gds
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-NEXT: ;;#ASMSTART
|
|
; GISEL-NEXT: ; def a0
|
|
; GISEL-NEXT: ;;#ASMEND
|
|
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
%val = call i32 asm "; def $0", "=a"()
|
|
call void @llvm.amdgcn.ds.gws.sema.release.all(i32 0)
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|