
Whole wave functions are functions that will run with a full EXEC mask. They will not be invoked directly, but instead will be launched by way of a new intrinsic, `llvm.amdgcn.call.whole.wave` (to be added in a future patch). These functions are meant as an alternative to the `llvm.amdgcn.init.whole.wave` or `llvm.amdgcn.strict.wwm` intrinsics. Whole wave functions will set EXEC to -1 in the prologue and restore the original value of EXEC in the epilogue. They must have a special first argument, `i1 %active`, that is going to be mapped to EXEC. They may have either the default calling convention or amdgpu_gfx. The inactive lanes need to be preserved for all registers used, active lanes only for the CSRs. At the IR level, arguments to a whole wave function (other than `%active`) contain poison in their inactive lanes. Likewise, the return value for the inactive lanes is poison. This patch contains the following work: * 2 new pseudos, SI_SETUP_WHOLE_WAVE_FUNC and SI_WHOLE_WAVE_FUNC_RETURN used for managing the EXEC mask. SI_SETUP_WHOLE_WAVE_FUNC will return a SReg_1 representing `%active`, which needs to be passed into SI_WHOLE_WAVE_FUNC_RETURN. * SelectionDAG support for generating these 2 new pseudos and the special handling of %active. Since the return may be in a different basic block, it's difficult to add the virtual reg for %active to SI_WHOLE_WAVE_FUNC_RETURN, so we initially generate an IMPLICIT_DEF which is later replaced via a custom inserter. * Expansion of the 2 pseudos during prolog/epilog insertion. PEI also marks any used VGPRs as WWM registers, which are then spilled and restored with the usual logic. Future patches will include the `llvm.amdgcn.call.whole.wave` intrinsic and a lot of optimization work (especially in order to reduce spills around function calls). --------- Co-authored-by: Matt Arsenault <Matthew.Arsenault@amd.com> Co-authored-by: Shilei Tian <i@tianshilei.me>
253 lines
7.9 KiB
LLVM
253 lines
7.9 KiB
LLVM
; RUN: not llvm-as < %s 2>&1 | FileCheck %s
|
|
|
|
target datalayout = "A5"
|
|
|
|
; CHECK: Calling convention requires void return type
|
|
; CHECK-NEXT: ptr @nonvoid_cc_amdgpu_kernel
|
|
define amdgpu_kernel i32 @nonvoid_cc_amdgpu_kernel() {
|
|
ret i32 0
|
|
}
|
|
|
|
; CHECK: Calling convention does not support varargs or perfect forwarding!
|
|
; CHECK-NEXT: ptr @varargs_amdgpu_kernel
|
|
define amdgpu_kernel void @varargs_amdgpu_kernel(...) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention does not allow sret
|
|
; CHECK-NEXT: ptr @sret_cc_amdgpu_kernel_as0
|
|
define amdgpu_kernel void @sret_cc_amdgpu_kernel_as0(ptr sret(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention does not allow sret
|
|
; CHECK-NEXT: ptr @sret_cc_amdgpu_kernel
|
|
define amdgpu_kernel void @sret_cc_amdgpu_kernel(ptr addrspace(5) sret(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention does not support varargs or perfect forwarding!
|
|
; CHECK-NEXT: ptr @varargs_amdgpu_vs
|
|
define amdgpu_vs void @varargs_amdgpu_vs(...) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention does not support varargs or perfect forwarding!
|
|
; CHECK-NEXT: ptr @varargs_amdgpu_gs
|
|
define amdgpu_gs void @varargs_amdgpu_gs(...) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention does not support varargs or perfect forwarding!
|
|
; CHECK-NEXT: ptr @varargs_amdgpu_ps
|
|
define amdgpu_ps void @varargs_amdgpu_ps(...) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention does not support varargs or perfect forwarding!
|
|
; CHECK-NEXT: ptr @varargs_amdgpu_cs
|
|
define amdgpu_cs void @varargs_amdgpu_cs(...) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention requires void return type
|
|
; CHECK-NEXT: ptr @nonvoid_cc_spir_kernel
|
|
define spir_kernel i32 @nonvoid_cc_spir_kernel() {
|
|
ret i32 0
|
|
}
|
|
|
|
; CHECK: Calling convention does not support varargs or perfect forwarding!
|
|
; CHECK-NEXT: ptr @varargs_spir_kernel
|
|
define spir_kernel void @varargs_spir_kernel(...) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention disallows byval
|
|
; CHECK-NEXT: ptr @byval_cc_amdgpu_kernel
|
|
define amdgpu_kernel void @byval_cc_amdgpu_kernel(ptr addrspace(5) byval(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention disallows byval
|
|
; CHECK-NEXT: ptr @byval_as1_cc_amdgpu_kernel
|
|
define amdgpu_kernel void @byval_as1_cc_amdgpu_kernel(ptr addrspace(1) byval(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention disallows byval
|
|
; CHECK-NEXT: ptr @byval_as0_cc_amdgpu_kernel
|
|
define amdgpu_kernel void @byval_as0_cc_amdgpu_kernel(ptr byval(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention disallows byval
|
|
; CHECK-NEXT: ptr @byval_cc_amdgpu_vs
|
|
define amdgpu_vs void @byval_cc_amdgpu_vs(ptr addrspace(5) byval(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention disallows byval
|
|
; CHECK-NEXT: ptr @byval_cc_amdgpu_hs
|
|
define amdgpu_hs void @byval_cc_amdgpu_hs(ptr addrspace(5) byval(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention disallows byval
|
|
; CHECK-NEXT: ptr @byval_cc_amdgpu_gs
|
|
define amdgpu_gs void @byval_cc_amdgpu_gs(ptr addrspace(5) byval(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention disallows byval
|
|
; CHECK-NEXT: ptr @byval_cc_amdgpu_ps
|
|
define amdgpu_ps void @byval_cc_amdgpu_ps(ptr addrspace(5) byval(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention disallows byval
|
|
; CHECK-NEXT: ptr @byval_cc_amdgpu_cs
|
|
define amdgpu_cs void @byval_cc_amdgpu_cs(ptr addrspace(5) byval(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention disallows preallocated
|
|
; CHECK-NEXT: ptr @preallocated_as0_cc_amdgpu_kernel
|
|
define amdgpu_kernel void @preallocated_as0_cc_amdgpu_kernel(ptr preallocated(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention disallows inalloca
|
|
; CHECK-NEXT: ptr @inalloca_as0_cc_amdgpu_kernel
|
|
define amdgpu_kernel void @inalloca_as0_cc_amdgpu_kernel(ptr inalloca(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention disallows stack byref
|
|
; CHECK-NEXT: ptr @byref_as5_cc_amdgpu_kernel
|
|
define amdgpu_kernel void @byref_as5_cc_amdgpu_kernel(ptr addrspace(5) byref(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention requires void return type
|
|
; CHECK-NEXT: ptr @nonvoid_cc_amdgpu_cs_chain
|
|
define amdgpu_cs_chain i32 @nonvoid_cc_amdgpu_cs_chain() {
|
|
ret i32 0
|
|
}
|
|
|
|
; CHECK: Calling convention does not support varargs or perfect forwarding!
|
|
; CHECK-NEXT: ptr @varargs_amdgpu_cs_chain
|
|
define amdgpu_cs_chain void @varargs_amdgpu_cs_chain(...) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention does not allow sret
|
|
; CHECK-NEXT: ptr @sret_cc_amdgpu_cs_chain_as0
|
|
define amdgpu_cs_chain void @sret_cc_amdgpu_cs_chain_as0(ptr sret(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention disallows byval
|
|
; CHECK-NEXT: ptr @byval_cc_amdgpu_cs_chain
|
|
define amdgpu_cs_chain void @byval_cc_amdgpu_cs_chain(ptr addrspace(1) byval(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention disallows stack byref
|
|
; CHECK-NEXT: ptr @byref_cc_amdgpu_cs_chain
|
|
define amdgpu_cs_chain void @byref_cc_amdgpu_cs_chain(ptr addrspace(5) byref(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention disallows preallocated
|
|
; CHECK-NEXT: ptr @preallocated_cc_amdgpu_cs_chain
|
|
define amdgpu_cs_chain void @preallocated_cc_amdgpu_cs_chain(ptr preallocated(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention disallows inalloca
|
|
; CHECK-NEXT: ptr @inalloca_cc_amdgpu_cs_chain
|
|
define amdgpu_cs_chain void @inalloca_cc_amdgpu_cs_chain(ptr inalloca(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention requires void return type
|
|
; CHECK-NEXT: ptr @nonvoid_cc_amdgpu_cs_chain_preserve
|
|
define amdgpu_cs_chain_preserve i32 @nonvoid_cc_amdgpu_cs_chain_preserve() {
|
|
ret i32 0
|
|
}
|
|
|
|
; CHECK: Calling convention does not support varargs or perfect forwarding!
|
|
; CHECK-NEXT: ptr @varargs_amdgpu_cs_chain_preserve
|
|
define amdgpu_cs_chain_preserve void @varargs_amdgpu_cs_chain_preserve(...) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention does not allow sret
|
|
; CHECK-NEXT: ptr @sret_cc_amdgpu_cs_chain_preserve_as0
|
|
define amdgpu_cs_chain_preserve void @sret_cc_amdgpu_cs_chain_preserve_as0(ptr sret(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention does not allow sret
|
|
; CHECK-NEXT: ptr @sret_cc_amdgpu_cs_chain_preserve
|
|
define amdgpu_cs_chain_preserve void @sret_cc_amdgpu_cs_chain_preserve(ptr addrspace(5) sret(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention disallows byval
|
|
; CHECK-NEXT: ptr @byval_cc_amdgpu_cs_chain_preserve
|
|
define amdgpu_cs_chain_preserve void @byval_cc_amdgpu_cs_chain_preserve(ptr addrspace(1) byval(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention disallows stack byref
|
|
; CHECK-NEXT: ptr @byref_cc_amdgpu_cs_chain_preserve
|
|
define amdgpu_cs_chain_preserve void @byref_cc_amdgpu_cs_chain_preserve(ptr addrspace(5) byref(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention disallows preallocated
|
|
; CHECK-NEXT: ptr @preallocated_cc_amdgpu_cs_chain_preserve
|
|
define amdgpu_cs_chain_preserve void @preallocated_cc_amdgpu_cs_chain_preserve(ptr preallocated(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention disallows inalloca
|
|
; CHECK-NEXT: ptr @inalloca_cc_amdgpu_cs_chain_preserve
|
|
define amdgpu_cs_chain_preserve void @inalloca_cc_amdgpu_cs_chain_preserve(ptr inalloca(i32) %ptr) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention requires first argument to be i1
|
|
; CHECK-NEXT: ptr @whole_wave_no_args
|
|
define amdgpu_gfx_whole_wave void @whole_wave_no_args() {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention requires first argument to be i1
|
|
; CHECK-NEXT: ptr @whole_wave_must_have_i1_active
|
|
define amdgpu_gfx_whole_wave void @whole_wave_must_have_i1_active(i32 %x) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention requires first argument to not be inreg
|
|
; CHECK-NEXT: ptr @whole_wave_i1_active_inreg
|
|
define amdgpu_gfx_whole_wave void @whole_wave_i1_active_inreg(i1 inreg %active) {
|
|
ret void
|
|
}
|
|
|
|
; CHECK: Calling convention does not support varargs
|
|
; CHECK-NEXT: ptr @whole_wave_varargs
|
|
define amdgpu_gfx_whole_wave void @whole_wave_varargs(i1 %active, i32 %x, ...) {
|
|
ret void
|
|
}
|
|
|
|
declare amdgpu_gfx_whole_wave void @whole_wave_callee(i1 %active)
|
|
|
|
; CHECK: calling convention does not permit calls
|
|
; CHECK-NEXT: call amdgpu_gfx_whole_wave void @whole_wave_callee(i1 true)
|
|
define amdgpu_cs void @cant_call_whole_wave_func() {
|
|
call amdgpu_gfx_whole_wave void @whole_wave_callee(i1 true)
|
|
ret void
|
|
}
|