[AMDGPU] Add the support for .cluster_dims code object metadata (#158721)
Co-authored-by: Ivan Kosarev <ivan.kosarev@amd.com>
This commit is contained in:
parent
b5afe416c7
commit
04cd39ae28
@ -4839,7 +4839,24 @@ Code object V5 metadata is the same as
|
||||
|
||||
====================== ============== ========= ================================
|
||||
|
||||
..
|
||||
.. _amdgpu-amdhsa-code-object-metadata-v6:
|
||||
|
||||
Code Object V6 Metadata
|
||||
+++++++++++++++++++++++
|
||||
|
||||
Code object V6 metadata is the same as
|
||||
:ref:`amdgpu-amdhsa-code-object-metadata-v5` with the changes defined in table
|
||||
:ref:`amdgpu-amdhsa-code-object-kernel-metadata-map-table-v6`.
|
||||
|
||||
.. table:: AMDHSA Code Object V6 Kernel Metadata Map Additions
|
||||
:name: amdgpu-amdhsa-code-object-kernel-metadata-map-table-v6
|
||||
|
||||
============================= ============= ========== =======================================
|
||||
String Key Value Type Required? Description
|
||||
============================= ============= ========== =======================================
|
||||
".cluster_dims" sequence of The dimension of the cluster.
|
||||
3 integers
|
||||
============================= ============= ========== =======================================
|
||||
|
||||
Kernel Dispatch
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
@ -281,7 +281,14 @@ bool MetadataVerifier::verifyKernel(msgpack::DocNode &Node) {
|
||||
return false;
|
||||
if (!verifyIntegerEntry(KernelMap, ".uniform_work_group_size", false))
|
||||
return false;
|
||||
|
||||
if (!verifyEntry(
|
||||
KernelMap, ".cluster_dims", false, [this](msgpack::DocNode &Node) {
|
||||
return verifyArray(
|
||||
Node,
|
||||
[this](msgpack::DocNode &Node) { return verifyInteger(Node); },
|
||||
3);
|
||||
}))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -254,9 +254,9 @@ void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func,
|
||||
}
|
||||
|
||||
void MetadataStreamerMsgPackV4::emitKernelAttrs(const AMDGPUTargetMachine &TM,
|
||||
const Function &Func,
|
||||
const MachineFunction &MF,
|
||||
msgpack::MapDocNode Kern) {
|
||||
|
||||
const Function &Func = MF.getFunction();
|
||||
if (auto *Node = Func.getMetadata("reqd_work_group_size"))
|
||||
Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
|
||||
if (auto *Node = Func.getMetadata("work_group_size_hint"))
|
||||
@ -599,7 +599,7 @@ void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF,
|
||||
Kern[".symbol"] = Kern.getDocument()->getNode(
|
||||
(Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
|
||||
emitKernelLanguage(Func, Kern);
|
||||
emitKernelAttrs(TM, Func, Kern);
|
||||
emitKernelAttrs(TM, MF, Kern);
|
||||
emitKernelArgs(MF, Kern);
|
||||
}
|
||||
|
||||
@ -726,10 +726,11 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
|
||||
}
|
||||
|
||||
void MetadataStreamerMsgPackV5::emitKernelAttrs(const AMDGPUTargetMachine &TM,
|
||||
const Function &Func,
|
||||
const MachineFunction &MF,
|
||||
msgpack::MapDocNode Kern) {
|
||||
MetadataStreamerMsgPackV4::emitKernelAttrs(TM, Func, Kern);
|
||||
MetadataStreamerMsgPackV4::emitKernelAttrs(TM, MF, Kern);
|
||||
|
||||
const Function &Func = MF.getFunction();
|
||||
if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())
|
||||
Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
|
||||
}
|
||||
@ -745,5 +746,21 @@ void MetadataStreamerMsgPackV6::emitVersion() {
|
||||
getRootMetadata("amdhsa.version") = Version;
|
||||
}
|
||||
|
||||
void MetadataStreamerMsgPackV6::emitKernelAttrs(const AMDGPUTargetMachine &TM,
|
||||
const MachineFunction &MF,
|
||||
msgpack::MapDocNode Kern) {
|
||||
MetadataStreamerMsgPackV5::emitKernelAttrs(TM, MF, Kern);
|
||||
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
ClusterDimsAttr Attr = MFI.getClusterDims();
|
||||
if (Attr.isFixedDims()) {
|
||||
msgpack::ArrayDocNode ClusterDimsNode = HSAMetadataDoc->getArrayNode();
|
||||
ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[0]));
|
||||
ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[1]));
|
||||
ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[2]));
|
||||
Kern[".cluster_dims"] = ClusterDimsNode;
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace AMDGPU::HSAMD
|
||||
} // end namespace llvm
|
||||
|
||||
@ -61,7 +61,7 @@ protected:
|
||||
virtual void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
|
||||
msgpack::ArrayDocNode Args) = 0;
|
||||
virtual void emitKernelAttrs(const AMDGPUTargetMachine &TM,
|
||||
const Function &Func,
|
||||
const MachineFunction &MF,
|
||||
msgpack::MapDocNode Kern) = 0;
|
||||
};
|
||||
|
||||
@ -102,7 +102,7 @@ protected:
|
||||
|
||||
void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern);
|
||||
|
||||
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const Function &Func,
|
||||
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF,
|
||||
msgpack::MapDocNode Kern) override;
|
||||
|
||||
void emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern);
|
||||
@ -149,7 +149,7 @@ protected:
|
||||
void emitVersion() override;
|
||||
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
|
||||
msgpack::ArrayDocNode Args) override;
|
||||
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const Function &Func,
|
||||
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF,
|
||||
msgpack::MapDocNode Kern) override;
|
||||
|
||||
public:
|
||||
@ -164,6 +164,9 @@ protected:
|
||||
public:
|
||||
MetadataStreamerMsgPackV6() = default;
|
||||
~MetadataStreamerMsgPackV6() = default;
|
||||
|
||||
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF,
|
||||
msgpack::MapDocNode Kern) override;
|
||||
};
|
||||
|
||||
} // end namespace HSAMD
|
||||
|
||||
47
llvm/test/CodeGen/AMDGPU/cluster-dims.ll
Normal file
47
llvm/test/CodeGen/AMDGPU/cluster-dims.ll
Normal file
@ -0,0 +1,47 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 %s -o - | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel %s -o - | FileCheck %s
|
||||
|
||||
; CHECK: .cluster_dims:
|
||||
; CHECK-NEXT: - 2
|
||||
; CHECK-NEXT: - 2
|
||||
; CHECK-NEXT: - 2
|
||||
define dso_local amdgpu_kernel void @_Z15test_literal_3dv() #0 {
|
||||
entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: .cluster_dims:
|
||||
; CHECK-NEXT: - 2
|
||||
; CHECK-NEXT: - 2
|
||||
; CHECK-NEXT: - 1
|
||||
define dso_local amdgpu_kernel void @_Z15test_literal_2dv() #1 {
|
||||
entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: .cluster_dims:
|
||||
; CHECK-NEXT: - 4
|
||||
; CHECK-NEXT: - 1
|
||||
; CHECK-NEXT: - 1
|
||||
define dso_local amdgpu_kernel void @_Z15test_literal_1dv() #2 {
|
||||
entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: .cluster_dims:
|
||||
; CHECK-NEXT: - 4
|
||||
; CHECK-NEXT: - 2
|
||||
; CHECK-NEXT: - 1
|
||||
define dso_local amdgpu_kernel void @_Z13test_constantv() #3 {
|
||||
entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="2,2,2" }
|
||||
attributes #1 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="2,2,1" }
|
||||
attributes #2 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="4,1,1" }
|
||||
attributes #3 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="4,2,1" }
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
|
||||
!0 = !{i32 1, !"amdhsa_code_object_version", i32 600}
|
||||
35
llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v6.s
Normal file
35
llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v6.s
Normal file
@ -0,0 +1,35 @@
|
||||
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1250 --amdhsa-code-object-version=6 -show-encoding %s | FileCheck %s
|
||||
|
||||
// CHECK: .amdgpu_metadata
|
||||
// CHECK: amdhsa.kernels:
|
||||
// CHECK: - .cluster_dims:
|
||||
// CHECK-NEXT: - 4
|
||||
// CHECK-NEXT: - 2
|
||||
// CHECK-NEXT: - 1
|
||||
.amdgpu_metadata
|
||||
amdhsa.version:
|
||||
- 1
|
||||
- 0
|
||||
amdhsa.printf:
|
||||
- '1:1:4:%d\n'
|
||||
- '2:1:8:%g\n'
|
||||
amdhsa.kernels:
|
||||
- .name: test_kernel
|
||||
.symbol: test_kernel@kd
|
||||
.language: OpenCL C
|
||||
.language_version:
|
||||
- 2
|
||||
- 0
|
||||
.kernarg_segment_size: 8
|
||||
.group_segment_fixed_size: 16
|
||||
.private_segment_fixed_size: 32
|
||||
.kernarg_segment_align: 64
|
||||
.wavefront_size: 128
|
||||
.sgpr_count: 14
|
||||
.vgpr_count: 40
|
||||
.max_flat_workgroup_size: 256
|
||||
.cluster_dims:
|
||||
- 4
|
||||
- 2
|
||||
- 1
|
||||
.end_amdgpu_metadata
|
||||
Loading…
x
Reference in New Issue
Block a user