[AMDGPU] Add the support for .cluster_dims code object metadata (#158721)

Co-authored-by: Ivan Kosarev <ivan.kosarev@amd.com>
This commit is contained in:
Shilei Tian 2025-09-15 16:13:07 -04:00 committed by GitHub
parent b5afe416c7
commit 04cd39ae28
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 136 additions and 10 deletions

View File

@ -4839,7 +4839,24 @@ Code object V5 metadata is the same as
====================== ============== ========= ================================
..
.. _amdgpu-amdhsa-code-object-metadata-v6:
Code Object V6 Metadata
+++++++++++++++++++++++
Code object V6 metadata is the same as
:ref:`amdgpu-amdhsa-code-object-metadata-v5` with the changes defined in table
:ref:`amdgpu-amdhsa-code-object-kernel-metadata-map-table-v6`.
.. table:: AMDHSA Code Object V6 Kernel Metadata Map Additions
:name: amdgpu-amdhsa-code-object-kernel-metadata-map-table-v6
============================= ============= ========== =======================================
String Key Value Type Required? Description
============================= ============= ========== =======================================
".cluster_dims" sequence of The dimension of the cluster.
3 integers
============================= ============= ========== =======================================
Kernel Dispatch
~~~~~~~~~~~~~~~

View File

@ -281,7 +281,14 @@ bool MetadataVerifier::verifyKernel(msgpack::DocNode &Node) {
return false;
if (!verifyIntegerEntry(KernelMap, ".uniform_work_group_size", false))
return false;
if (!verifyEntry(
KernelMap, ".cluster_dims", false, [this](msgpack::DocNode &Node) {
return verifyArray(
Node,
[this](msgpack::DocNode &Node) { return verifyInteger(Node); },
3);
}))
return false;
return true;
}

View File

@ -254,9 +254,9 @@ void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func,
}
void MetadataStreamerMsgPackV4::emitKernelAttrs(const AMDGPUTargetMachine &TM,
const Function &Func,
const MachineFunction &MF,
msgpack::MapDocNode Kern) {
const Function &Func = MF.getFunction();
if (auto *Node = Func.getMetadata("reqd_work_group_size"))
Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
if (auto *Node = Func.getMetadata("work_group_size_hint"))
@ -599,7 +599,7 @@ void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF,
Kern[".symbol"] = Kern.getDocument()->getNode(
(Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
emitKernelLanguage(Func, Kern);
emitKernelAttrs(TM, Func, Kern);
emitKernelAttrs(TM, MF, Kern);
emitKernelArgs(MF, Kern);
}
@ -726,10 +726,11 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
}
void MetadataStreamerMsgPackV5::emitKernelAttrs(const AMDGPUTargetMachine &TM,
const Function &Func,
const MachineFunction &MF,
msgpack::MapDocNode Kern) {
MetadataStreamerMsgPackV4::emitKernelAttrs(TM, Func, Kern);
MetadataStreamerMsgPackV4::emitKernelAttrs(TM, MF, Kern);
const Function &Func = MF.getFunction();
if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())
Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
}
@ -745,5 +746,21 @@ void MetadataStreamerMsgPackV6::emitVersion() {
getRootMetadata("amdhsa.version") = Version;
}
void MetadataStreamerMsgPackV6::emitKernelAttrs(const AMDGPUTargetMachine &TM,
const MachineFunction &MF,
msgpack::MapDocNode Kern) {
MetadataStreamerMsgPackV5::emitKernelAttrs(TM, MF, Kern);
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
ClusterDimsAttr Attr = MFI.getClusterDims();
if (Attr.isFixedDims()) {
msgpack::ArrayDocNode ClusterDimsNode = HSAMetadataDoc->getArrayNode();
ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[0]));
ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[1]));
ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[2]));
Kern[".cluster_dims"] = ClusterDimsNode;
}
}
} // end namespace AMDGPU::HSAMD
} // end namespace llvm

View File

@ -61,7 +61,7 @@ protected:
virtual void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
msgpack::ArrayDocNode Args) = 0;
virtual void emitKernelAttrs(const AMDGPUTargetMachine &TM,
const Function &Func,
const MachineFunction &MF,
msgpack::MapDocNode Kern) = 0;
};
@ -102,7 +102,7 @@ protected:
void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern);
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const Function &Func,
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF,
msgpack::MapDocNode Kern) override;
void emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern);
@ -149,7 +149,7 @@ protected:
void emitVersion() override;
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
msgpack::ArrayDocNode Args) override;
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const Function &Func,
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF,
msgpack::MapDocNode Kern) override;
public:
@ -164,6 +164,9 @@ protected:
public:
MetadataStreamerMsgPackV6() = default;
~MetadataStreamerMsgPackV6() = default;
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF,
msgpack::MapDocNode Kern) override;
};
} // end namespace HSAMD

View File

@ -0,0 +1,47 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 %s -o - | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel %s -o - | FileCheck %s
; CHECK: .cluster_dims:
; CHECK-NEXT: - 2
; CHECK-NEXT: - 2
; CHECK-NEXT: - 2
define dso_local amdgpu_kernel void @_Z15test_literal_3dv() #0 {
entry:
ret void
}
; CHECK: .cluster_dims:
; CHECK-NEXT: - 2
; CHECK-NEXT: - 2
; CHECK-NEXT: - 1
define dso_local amdgpu_kernel void @_Z15test_literal_2dv() #1 {
entry:
ret void
}
; CHECK: .cluster_dims:
; CHECK-NEXT: - 4
; CHECK-NEXT: - 1
; CHECK-NEXT: - 1
define dso_local amdgpu_kernel void @_Z15test_literal_1dv() #2 {
entry:
ret void
}
; CHECK: .cluster_dims:
; CHECK-NEXT: - 4
; CHECK-NEXT: - 2
; CHECK-NEXT: - 1
define dso_local amdgpu_kernel void @_Z13test_constantv() #3 {
entry:
ret void
}
attributes #0 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="2,2,2" }
attributes #1 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="2,2,1" }
attributes #2 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="4,1,1" }
attributes #3 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="4,2,1" }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdhsa_code_object_version", i32 600}

View File

@ -0,0 +1,35 @@
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1250 --amdhsa-code-object-version=6 -show-encoding %s | FileCheck %s
// CHECK: .amdgpu_metadata
// CHECK: amdhsa.kernels:
// CHECK: - .cluster_dims:
// CHECK-NEXT: - 4
// CHECK-NEXT: - 2
// CHECK-NEXT: - 1
.amdgpu_metadata
amdhsa.version:
- 1
- 0
amdhsa.printf:
- '1:1:4:%d\n'
- '2:1:8:%g\n'
amdhsa.kernels:
- .name: test_kernel
.symbol: test_kernel@kd
.language: OpenCL C
.language_version:
- 2
- 0
.kernarg_segment_size: 8
.group_segment_fixed_size: 16
.private_segment_fixed_size: 32
.kernarg_segment_align: 64
.wavefront_size: 128
.sgpr_count: 14
.vgpr_count: 40
.max_flat_workgroup_size: 256
.cluster_dims:
- 4
- 2
- 1
.end_amdgpu_metadata