
The previous implementation wasn't maintaining a faithful IR representation of how this really works. The value returned by createEnqueuedBlockKernel wasn't actually used as a function, and hacked up later to be a pointer to the runtime handle global variable. In reality, the enqueued block is a struct where the first field is a pointer to the kernel descriptor, not the kernel itself. We were also relying on passing around a reference to a global using a string attribute containing its name. It's better to base this on a proper IR symbol reference during final emission. This now avoids using a function attribute on kernels and avoids using the additional "runtime-handle" attribute to populate the final metadata. Instead, associate the runtime handle reference to the kernel with the !associated global metadata. We can then get a final, correctly mangled name at the end. I couldn't figure out how to get rename-with-external-symbol behavior using a combination of comdats and aliases, so leaves an IR pass to externalize the runtime handles for codegen. If anything breaks, it's most likely this, so leave avoiding this for a later step. Use a special section name to enable this behavior. This also means it's possible to declare enqueuable kernels in source without going through the dedicated block syntax or other dedicated compiler support. We could move towards initializing the runtime handle in the compiler/linker. I have a working patch where the linker sets up the first field of the handle, avoiding the need to export the block kernel symbol for the runtime. We would need new relocations to get the private and group sizes, but that would avoid the runtime's special case handling that requires the device_enqueue_symbol metadata field. https://reviews.llvm.org/D141700
63 lines
2.3 KiB
LLVM
63 lines
2.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-export-kernel-runtime-handles < %s | FileCheck %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-export-kernel-runtime-handles < %s | FileCheck %s
|
|
|
|
%block.runtime.handle.t = type { ptr addrspace(1), i32, i32 }
|
|
|
|
; associated globals without the correct section should be ignored.
|
|
@block.handle = internal addrspace(1) externally_initialized constant %block.runtime.handle.t zeroinitializer, section ".amdgpu.kernel.runtime.handle"
|
|
@not.a.block.handle = internal addrspace(1) externally_initialized constant %block.runtime.handle.t zeroinitializer
|
|
|
|
;.
|
|
; CHECK: @block.handle = addrspace(1) externally_initialized constant %block.runtime.handle.t zeroinitializer, section ".amdgpu.kernel.runtime.handle"
|
|
; CHECK: @not.a.block.handle = internal addrspace(1) externally_initialized constant %block.runtime.handle.t zeroinitializer
|
|
;.
|
|
define internal amdgpu_kernel void @block_kernel() !associated !0 {
|
|
; CHECK-LABEL: define protected amdgpu_kernel void @block_kernel(
|
|
; CHECK-SAME: ) !associated [[META0:![0-9]+]] {
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
ret void
|
|
}
|
|
|
|
define internal dso_local amdgpu_kernel void @dso_local_block_kernel() !associated !0 {
|
|
; CHECK-LABEL: define protected amdgpu_kernel void @dso_local_block_kernel(
|
|
; CHECK-SAME: ) !associated [[META0]] {
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
ret void
|
|
}
|
|
|
|
define internal amdgpu_kernel void @not_block_kernel() !associated !1 {
|
|
; CHECK-LABEL: define internal amdgpu_kernel void @not_block_kernel(
|
|
; CHECK-SAME: ) !associated [[META1:![0-9]+]] {
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
ret void
|
|
}
|
|
|
|
define internal amdgpu_kernel void @associated_null() !associated !2 {
|
|
; CHECK-LABEL: define internal amdgpu_kernel void @associated_null(
|
|
; CHECK-SAME: ) !associated [[META2:![0-9]+]] {
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
ret void
|
|
}
|
|
|
|
define internal amdgpu_kernel void @no_metadata() {
|
|
; CHECK-LABEL: define internal amdgpu_kernel void @no_metadata() {
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
ret void
|
|
}
|
|
|
|
!0 = !{ptr addrspace(1) @block.handle }
|
|
!1 = !{ptr addrspace(1) @not.a.block.handle }
|
|
!2 = !{ptr addrspace(1) null }
|
|
|
|
;.
|
|
; CHECK: [[META0]] = !{ptr addrspace(1) @block.handle}
|
|
; CHECK: [[META1]] = !{ptr addrspace(1) @not.a.block.handle}
|
|
; CHECK: [[META2]] = !{ptr addrspace(1) null}
|
|
;.
|