
In a nutshell, this moves our libomptarget code to populate the offload subproject. With this commit, users need to enable the new LLVM/Offload subproject as a runtime in their cmake configuration. No further changes are expected for downstream code. Tests and other components still depend on OpenMP and have also not been renamed. The results below are for a build in which OpenMP and Offload are enabled runtimes. In addition to the pure `git mv`, we needed to adjust some CMake files. Nothing is intended to change semantics. ``` ninja check-offload ``` Works with the X86 and AMDGPU offload tests ``` ninja check-openmp ``` Still works but doesn't build offload tests anymore. ``` ls install/lib ``` Shows all expected libraries, incl. - `libomptarget.devicertl.a` - `libomptarget-nvptx-sm_90.bc` - `libomptarget.rtl.amdgpu.so` -> `libomptarget.rtl.amdgpu.so.18git` - `libomptarget.so` -> `libomptarget.so.18git` Fixes: https://github.com/llvm/llvm-project/issues/75124 --------- Co-authored-by: Saiyedul Islam <Saiyedul.Islam@amd.com>
318 lines
10 KiB
C++
318 lines
10 KiB
C++
//===----RTLs/amdgpu/utils/UtilitiesRTL.h ------------------------- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// RTL Utilities for AMDGPU plugins
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include <cstdint>
|
|
|
|
#include "Shared/Debug.h"
|
|
#include "Utils/ELF.h"
|
|
|
|
#include "omptarget.h"
|
|
|
|
#include "llvm/ADT/StringMap.h"
|
|
#include "llvm/ADT/StringRef.h"
|
|
#include "llvm/Support/Error.h"
|
|
|
|
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
|
|
#include "llvm/BinaryFormat/ELF.h"
|
|
#include "llvm/BinaryFormat/MsgPackDocument.h"
|
|
#include "llvm/Support/MemoryBufferRef.h"
|
|
#include "llvm/Support/YAMLTraits.h"
|
|
|
|
using namespace llvm::ELF;
|
|
|
|
namespace llvm {
|
|
namespace omp {
|
|
namespace target {
|
|
namespace plugin {
|
|
namespace utils {
|
|
|
|
// The implicit arguments of COV5 AMDGPU kernels.
|
|
struct AMDGPUImplicitArgsTy {
|
|
uint32_t BlockCountX;
|
|
uint32_t BlockCountY;
|
|
uint32_t BlockCountZ;
|
|
uint16_t GroupSizeX;
|
|
uint16_t GroupSizeY;
|
|
uint16_t GroupSizeZ;
|
|
uint8_t Unused0[46]; // 46 byte offset.
|
|
uint16_t GridDims;
|
|
uint8_t Unused1[54]; // 54 byte offset.
|
|
uint32_t DynamicLdsSize;
|
|
uint8_t Unused2[132]; // 132 byte offset.
|
|
};
|
|
|
|
// Dummy struct for COV4 implicitargs.
|
|
struct AMDGPUImplicitArgsTyCOV4 {
|
|
uint8_t Unused[56];
|
|
};
|
|
|
|
inline uint32_t getImplicitArgsSize(uint16_t Version) {
|
|
return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5
|
|
? sizeof(AMDGPUImplicitArgsTyCOV4)
|
|
: sizeof(AMDGPUImplicitArgsTy);
|
|
}
|
|
|
|
/// Check if an image is compatible with current system's environment. The
|
|
/// system environment is given as a 'target-id' which has the form:
|
|
///
|
|
/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
|
|
///
|
|
/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
|
|
/// and is compatible with either '+' or '-'. The HSA runtime returns this
|
|
/// information using the target-id, while we use the ELF header to determine
|
|
/// these features.
|
|
inline bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
|
|
StringRef EnvTargetID) {
|
|
StringRef EnvArch = EnvTargetID.split(":").first;
|
|
|
|
// Trivial check if the base processors match.
|
|
if (EnvArch != ImageArch)
|
|
return false;
|
|
|
|
// Check if the image is requesting xnack on or off.
|
|
switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
|
|
case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
|
|
// The image is 'xnack-' so the environment must be 'xnack-'.
|
|
if (!EnvTargetID.contains("xnack-"))
|
|
return false;
|
|
break;
|
|
case EF_AMDGPU_FEATURE_XNACK_ON_V4:
|
|
// The image is 'xnack+' so the environment must be 'xnack+'.
|
|
if (!EnvTargetID.contains("xnack+"))
|
|
return false;
|
|
break;
|
|
case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
|
|
case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
|
|
default:
|
|
break;
|
|
}
|
|
|
|
// Check if the image is requesting sramecc on or off.
|
|
switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
|
|
case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
|
|
// The image is 'sramecc-' so the environment must be 'sramecc-'.
|
|
if (!EnvTargetID.contains("sramecc-"))
|
|
return false;
|
|
break;
|
|
case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
|
|
// The image is 'sramecc+' so the environment must be 'sramecc+'.
|
|
if (!EnvTargetID.contains("sramecc+"))
|
|
return false;
|
|
break;
|
|
case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
|
|
case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
struct KernelMetaDataTy {
|
|
uint64_t KernelObject;
|
|
uint32_t GroupSegmentList;
|
|
uint32_t PrivateSegmentSize;
|
|
uint32_t SGPRCount;
|
|
uint32_t VGPRCount;
|
|
uint32_t SGPRSpillCount;
|
|
uint32_t VGPRSpillCount;
|
|
uint32_t KernelSegmentSize;
|
|
uint32_t ExplicitArgumentCount;
|
|
uint32_t ImplicitArgumentCount;
|
|
uint32_t RequestedWorkgroupSize[3];
|
|
uint32_t WorkgroupSizeHint[3];
|
|
uint32_t WavefronSize;
|
|
uint32_t MaxFlatWorkgroupSize;
|
|
};
|
|
namespace {
|
|
|
|
/// Reads the AMDGPU specific per-kernel-metadata from an image.
|
|
class KernelInfoReader {
|
|
public:
|
|
KernelInfoReader(StringMap<KernelMetaDataTy> &KIM) : KernelInfoMap(KIM) {}
|
|
|
|
/// Process ELF note to read AMDGPU metadata from respective information
|
|
/// fields.
|
|
Error processNote(const object::ELF64LE::Note &Note, size_t Align) {
|
|
if (Note.getName() != "AMDGPU")
|
|
return Error::success(); // We are not interested in other things
|
|
|
|
assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
|
|
"Parse AMDGPU MetaData");
|
|
auto Desc = Note.getDesc(Align);
|
|
StringRef MsgPackString =
|
|
StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
|
|
msgpack::Document MsgPackDoc;
|
|
if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
|
|
return Error::success();
|
|
|
|
AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
|
|
if (!Verifier.verify(MsgPackDoc.getRoot()))
|
|
return Error::success();
|
|
|
|
auto RootMap = MsgPackDoc.getRoot().getMap(true);
|
|
|
|
if (auto Err = iterateAMDKernels(RootMap))
|
|
return Err;
|
|
|
|
return Error::success();
|
|
}
|
|
|
|
private:
|
|
/// Extracts the relevant information via simple string look-up in the msgpack
|
|
/// document elements.
|
|
Error extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
|
|
std::string &KernelName,
|
|
KernelMetaDataTy &KernelData) {
|
|
if (!V.first.isString())
|
|
return Error::success();
|
|
|
|
const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
|
|
return DK.getString() == SK;
|
|
};
|
|
|
|
const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
|
|
uint32_t *Vals) {
|
|
assert(DN.isArray() && "MsgPack DocNode is an array node");
|
|
auto DNA = DN.getArray();
|
|
assert(DNA.size() == 3 && "ArrayNode has at most three elements");
|
|
|
|
int I = 0;
|
|
for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
|
|
++DNABegin) {
|
|
Vals[I++] = DNABegin->getUInt();
|
|
}
|
|
};
|
|
|
|
if (IsKey(V.first, ".name")) {
|
|
KernelName = V.second.toString();
|
|
} else if (IsKey(V.first, ".sgpr_count")) {
|
|
KernelData.SGPRCount = V.second.getUInt();
|
|
} else if (IsKey(V.first, ".sgpr_spill_count")) {
|
|
KernelData.SGPRSpillCount = V.second.getUInt();
|
|
} else if (IsKey(V.first, ".vgpr_count")) {
|
|
KernelData.VGPRCount = V.second.getUInt();
|
|
} else if (IsKey(V.first, ".vgpr_spill_count")) {
|
|
KernelData.VGPRSpillCount = V.second.getUInt();
|
|
} else if (IsKey(V.first, ".private_segment_fixed_size")) {
|
|
KernelData.PrivateSegmentSize = V.second.getUInt();
|
|
} else if (IsKey(V.first, ".group_segment_fixed_size")) {
|
|
KernelData.GroupSegmentList = V.second.getUInt();
|
|
} else if (IsKey(V.first, ".reqd_workgroup_size")) {
|
|
GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
|
|
} else if (IsKey(V.first, ".workgroup_size_hint")) {
|
|
GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
|
|
} else if (IsKey(V.first, ".wavefront_size")) {
|
|
KernelData.WavefronSize = V.second.getUInt();
|
|
} else if (IsKey(V.first, ".max_flat_workgroup_size")) {
|
|
KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
|
|
}
|
|
|
|
return Error::success();
|
|
}
|
|
|
|
/// Get the "amdhsa.kernels" element from the msgpack Document
|
|
Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
|
|
auto Res = MDN.find("amdhsa.kernels");
|
|
if (Res == MDN.end())
|
|
return createStringError(inconvertibleErrorCode(),
|
|
"Could not find amdhsa.kernels key");
|
|
|
|
auto Pair = *Res;
|
|
assert(Pair.second.isArray() &&
|
|
"AMDGPU kernel entries are arrays of entries");
|
|
|
|
return Pair.second.getArray();
|
|
}
|
|
|
|
/// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
|
|
/// MapDocNode that either maps a string to a single value (most of them) or
|
|
/// to another array of things. Currently, we only handle the case that maps
|
|
/// to scalar value.
|
|
Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
|
|
KernelMetaDataTy KernelData;
|
|
std::string KernelName;
|
|
auto Entry = (*It).getMap();
|
|
for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
|
|
if (auto Err = extractKernelData(*MI, KernelName, KernelData))
|
|
return Err;
|
|
|
|
KernelInfoMap.insert({KernelName, KernelData});
|
|
return Error::success();
|
|
}
|
|
|
|
/// Go over the list of AMD kernels in the "amdhsa.kernels" entry
|
|
Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
|
|
auto KernelsOrErr = getAMDKernelsArray(MDN);
|
|
if (auto Err = KernelsOrErr.takeError())
|
|
return Err;
|
|
|
|
auto KernelsArr = *KernelsOrErr;
|
|
for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
|
|
if (!It->isMap())
|
|
continue; // we expect <key,value> pairs
|
|
|
|
// Obtain the value for the different entries. Each array entry is a
|
|
// MapDocNode
|
|
if (auto Err = generateKernelInfo(It))
|
|
return Err;
|
|
}
|
|
return Error::success();
|
|
}
|
|
|
|
// Kernel names are the keys
|
|
StringMap<KernelMetaDataTy> &KernelInfoMap;
|
|
};
|
|
} // namespace
|
|
|
|
/// Reads the AMDGPU specific metadata from the ELF file and propagates the
|
|
/// KernelInfoMap
|
|
inline Error
|
|
readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
|
|
StringMap<KernelMetaDataTy> &KernelInfoMap,
|
|
uint16_t &ELFABIVersion) {
|
|
Error Err = Error::success(); // Used later as out-parameter
|
|
|
|
auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
|
|
if (auto Err = ELFOrError.takeError())
|
|
return Err;
|
|
|
|
const object::ELF64LEFile ELFObj = ELFOrError.get();
|
|
ArrayRef<object::ELF64LE::Shdr> Sections = cantFail(ELFObj.sections());
|
|
KernelInfoReader Reader(KernelInfoMap);
|
|
|
|
// Read the code object version from ELF image header
|
|
auto Header = ELFObj.getHeader();
|
|
ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
|
|
DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
|
|
|
|
for (const auto &S : Sections) {
|
|
if (S.sh_type != ELF::SHT_NOTE)
|
|
continue;
|
|
|
|
for (const auto N : ELFObj.notes(S, Err)) {
|
|
if (Err)
|
|
return Err;
|
|
// Fills the KernelInfoTabel entries in the reader
|
|
if ((Err = Reader.processNote(N, S.sh_addralign)))
|
|
return Err;
|
|
}
|
|
}
|
|
|
|
return Error::success();
|
|
}
|
|
|
|
} // namespace utils
|
|
} // namespace plugin
|
|
} // namespace target
|
|
} // namespace omp
|
|
} // namespace llvm
|