
This patch adds the `#gpu.kernel_metadata` and `#gpu.kernel_table` attributes. The `#gpu.kernel_metadata` attribute allows storing metadata related to a compiled kernel, for example, the number of scalar registers used by the kernel. The attribute only has 2 required parameters, the name and function type. It also has 2 optional parameters, the arguments attributes and generic dictionary for storing all other metadata. The `#gpu.kernel_table` stores a table of `#gpu.kernel_metadata`, mapping the name of the kernel to the metadata. Finally, the function `ROCDL::getAMDHSAKernelsELFMetadata` was added to collect ELF metadata from a binary, and to test the class methods in both attributes. Example: ```mlir gpu.binary @binary [#gpu.object<#rocdl.target<chip = "gfx900">, kernels = #gpu.kernel_table<[ #gpu.kernel_metadata<"kernel0", (i32) -> (), metadata = {sgpr_count = 255}>, #gpu.kernel_metadata<"kernel1", (i32, f32) -> (), arg_attrs = [{llvm.read_only}, {}]> ]> , bin = "BLOB">] ``` The motivation behind these attributes is to provide useful information for things like tunning. --------- Co-authored-by: Mehdi Amini <joker.eph@gmail.com>
67 lines
2.1 KiB
Python
67 lines
2.1 KiB
Python
# RUN: %PYTHON %s | FileCheck %s
|
|
|
|
from mlir.ir import *
|
|
import mlir.dialects.gpu as gpu
|
|
import mlir.dialects.gpu.passes
|
|
from mlir.passmanager import *
|
|
|
|
|
|
def run(f):
|
|
print("\nTEST:", f.__name__)
|
|
with Context(), Location.unknown():
|
|
f()
|
|
return f
|
|
|
|
|
|
# CHECK-LABEL: testGPUPass
|
|
# CHECK: SUCCESS
|
|
@run
|
|
def testGPUPass():
|
|
PassManager.parse("any(gpu-kernel-outlining)")
|
|
print("SUCCESS")
|
|
|
|
|
|
# CHECK-LABEL: testMMAElementWiseAttr
|
|
@run
|
|
def testMMAElementWiseAttr():
|
|
module = Module.create()
|
|
with InsertionPoint(module.body):
|
|
gpu.BlockDimOp(gpu.Dimension.y)
|
|
# CHECK: %block_dim_y = gpu.block_dim y
|
|
print(module)
|
|
pass
|
|
|
|
|
|
# CHECK-LABEL: testObjectAttr
|
|
@run
|
|
def testObjectAttr():
|
|
target = Attribute.parse("#nvvm.target")
|
|
format = gpu.CompilationTarget.Fatbin
|
|
object = b"BC\xc0\xde5\x14\x00\x00\x05\x00\x00\x00b\x0c0$MY\xbef"
|
|
properties = DictAttr.get({"O": IntegerAttr.get(IntegerType.get_signless(32), 2)})
|
|
o = gpu.ObjectAttr.get(target, format, object, properties)
|
|
# CHECK: #gpu.object<#nvvm.target, properties = {O = 2 : i32}, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf">
|
|
print(o)
|
|
assert o.object == object
|
|
|
|
o = gpu.ObjectAttr.get(target, format, object)
|
|
# CHECK: #gpu.object<#nvvm.target, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf">
|
|
print(o)
|
|
|
|
object = (
|
|
b"//\n// Generated by LLVM NVPTX Back-End\n//\n\n.version 6.0\n.target sm_50"
|
|
)
|
|
o = gpu.ObjectAttr.get(target, format, object)
|
|
# CHECK: #gpu.object<#nvvm.target, "//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 6.0\0A.target sm_50">
|
|
print(o)
|
|
assert o.object == object
|
|
|
|
object = b"BC\xc0\xde5\x14\x00\x00\x05\x00\x00\x00b\x0c0$MY\xbef"
|
|
kernelTable = Attribute.parse(
|
|
'#gpu.kernel_table<[#gpu.kernel_metadata<"kernel", () -> ()>]>'
|
|
)
|
|
o = gpu.ObjectAttr.get(target, format, object, kernels=kernelTable)
|
|
# CHECK: #gpu.object<#nvvm.target, kernels = <[#gpu.kernel_metadata<"kernel", () -> ()>]>, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf">
|
|
print(o)
|
|
assert o.kernels == kernelTable
|