Maksim Sabianin 65d730b4a5
[SYCL] Add offload wrapping for SYCL kind (#147508)
This patch adds an Offload Wrapper for the SYCL kind. This is an
essential step for SYCL offloading and the compilation flow. The usage
of offload wrapping is added to the clang-linker-wrapper tool.

Modifications:
Implemented `bundleSYCL()` function to handle SYCL image bundling.
Implemented `wrapSYCLBinaries()` function that is invoked from
clang-linker-wrapper.
SYCL Offload Wrapping uses specific data structures such as
`__sycl.tgt_device_image` and `__sycl.tgt_bin_desc`. Each SYCL image
maintains its own symbol table (unlike shared global tables in other
targets). Therefore, symbols are encoded explicitly during the offload
wrapping. Also, images refer to their own Offloading Entries arrays
unlike other targets.
The proposed `__sycl.tgt_device_image` uses Version 3 to differentiate
from images generated by Intel DPC++. The structure proposed in this
patch doesn't have fields deprecated in DPC++.
2025-09-26 17:02:31 +00:00

461 lines
18 KiB
C++

//===- Utility.cpp ------ Collection of generic offloading utilities ------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/Frontend/Offloading/Utility.h"
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Value.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/ObjectYAML/ELFYAML.h"
#include "llvm/ObjectYAML/yaml2obj.h"
#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
using namespace llvm::offloading;
StructType *offloading::getEntryTy(Module &M) {
LLVMContext &C = M.getContext();
StructType *EntryTy =
StructType::getTypeByName(C, "struct.__tgt_offload_entry");
if (!EntryTy)
EntryTy = StructType::create(
"struct.__tgt_offload_entry", Type::getInt64Ty(C), Type::getInt16Ty(C),
Type::getInt16Ty(C), Type::getInt32Ty(C), PointerType::getUnqual(C),
PointerType::getUnqual(C), Type::getInt64Ty(C), Type::getInt64Ty(C),
PointerType::getUnqual(C));
return EntryTy;
}
std::pair<Constant *, GlobalVariable *>
offloading::getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind,
Constant *Addr, StringRef Name,
uint64_t Size, uint32_t Flags,
uint64_t Data, Constant *AuxAddr) {
const llvm::Triple &Triple = M.getTargetTriple();
Type *PtrTy = PointerType::getUnqual(M.getContext());
Type *Int64Ty = Type::getInt64Ty(M.getContext());
Type *Int32Ty = Type::getInt32Ty(M.getContext());
Type *Int16Ty = Type::getInt16Ty(M.getContext());
Constant *AddrName = ConstantDataArray::getString(M.getContext(), Name);
StringRef Prefix =
Triple.isNVPTX() ? "$offloading$entry_name" : ".offloading.entry_name";
// Create the constant string used to look up the symbol in the device.
auto *Str =
new GlobalVariable(M, AddrName->getType(), /*isConstant=*/true,
GlobalValue::InternalLinkage, AddrName, Prefix);
StringRef SectionName = ".llvm.rodata.offloading";
Str->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
Str->setSection(SectionName);
Str->setAlignment(Align(1));
// Make a metadata node for these constants so it can be queried from IR.
NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.offloading.symbols");
Metadata *MDVals[] = {ConstantAsMetadata::get(Str)};
MD->addOperand(llvm::MDNode::get(M.getContext(), MDVals));
// Construct the offloading entry.
Constant *EntryData[] = {
ConstantExpr::getNullValue(Int64Ty),
ConstantInt::get(Int16Ty, 1),
ConstantInt::get(Int16Ty, Kind),
ConstantInt::get(Int32Ty, Flags),
ConstantExpr::getPointerBitCastOrAddrSpaceCast(Addr, PtrTy),
ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, PtrTy),
ConstantInt::get(Int64Ty, Size),
ConstantInt::get(Int64Ty, Data),
AuxAddr ? ConstantExpr::getPointerBitCastOrAddrSpaceCast(AuxAddr, PtrTy)
: ConstantExpr::getNullValue(PtrTy)};
Constant *EntryInitializer = ConstantStruct::get(getEntryTy(M), EntryData);
return {EntryInitializer, Str};
}
GlobalVariable *
offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind,
Constant *Addr, StringRef Name, uint64_t Size,
uint32_t Flags, uint64_t Data,
Constant *AuxAddr, StringRef SectionName) {
const llvm::Triple &Triple = M.getTargetTriple();
auto [EntryInitializer, NameGV] = getOffloadingEntryInitializer(
M, Kind, Addr, Name, Size, Flags, Data, AuxAddr);
StringRef Prefix =
Triple.isNVPTX() ? "$offloading$entry$" : ".offloading.entry.";
auto *Entry = new GlobalVariable(
M, getEntryTy(M),
/*isConstant=*/true, GlobalValue::WeakAnyLinkage, EntryInitializer,
Prefix + Name, nullptr, GlobalValue::NotThreadLocal,
M.getDataLayout().getDefaultGlobalsAddressSpace());
// The entry has to be created in the section the linker expects it to be.
if (Triple.isOSBinFormatCOFF())
Entry->setSection((SectionName + "$OE").str());
else
Entry->setSection(SectionName);
Entry->setAlignment(Align(object::OffloadBinary::getAlignment()));
return Entry;
}
std::pair<GlobalVariable *, GlobalVariable *>
offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
const llvm::Triple &Triple = M.getTargetTriple();
auto *ZeroInitilaizer =
ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u));
auto *EntryInit = Triple.isOSBinFormatCOFF() ? ZeroInitilaizer : nullptr;
auto *EntryType = ArrayType::get(getEntryTy(M), 0);
auto Linkage = Triple.isOSBinFormatCOFF() ? GlobalValue::WeakODRLinkage
: GlobalValue::ExternalLinkage;
auto *EntriesB =
new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
"__start_" + SectionName);
EntriesB->setVisibility(GlobalValue::HiddenVisibility);
auto *EntriesE =
new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
"__stop_" + SectionName);
EntriesE->setVisibility(GlobalValue::HiddenVisibility);
if (Triple.isOSBinFormatELF()) {
// We assume that external begin/end symbols that we have created above will
// be defined by the linker. This is done whenever a section name with a
// valid C-identifier is present. We define a dummy variable here to force
// the linker to always provide these symbols.
auto *DummyEntry = new GlobalVariable(
M, ZeroInitilaizer->getType(), true, GlobalVariable::InternalLinkage,
ZeroInitilaizer, "__dummy." + SectionName);
DummyEntry->setSection(SectionName);
DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment()));
appendToCompilerUsed(M, DummyEntry);
} else {
// The COFF linker will merge sections containing a '$' together into a
// single section. The order of entries in this section will be sorted
// alphabetically by the characters following the '$' in the name. Set the
// sections here to ensure that the beginning and end symbols are sorted.
EntriesB->setSection((SectionName + "$OA").str());
EntriesE->setSection((SectionName + "$OZ").str());
}
return std::make_pair(EntriesB, EntriesE);
}
bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
uint32_t ImageFlags,
StringRef EnvTargetID) {
using namespace llvm::ELF;
StringRef EnvArch = EnvTargetID.split(":").first;
// Trivial check if the base processors match.
if (EnvArch != ImageArch)
return false;
// Check if the image is requesting xnack on or off.
switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
// The image is 'xnack-' so the environment must be 'xnack-'.
if (!EnvTargetID.contains("xnack-"))
return false;
break;
case EF_AMDGPU_FEATURE_XNACK_ON_V4:
// The image is 'xnack+' so the environment must be 'xnack+'.
if (!EnvTargetID.contains("xnack+"))
return false;
break;
case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
default:
break;
}
// Check if the image is requesting sramecc on or off.
switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
// The image is 'sramecc-' so the environment must be 'sramecc-'.
if (!EnvTargetID.contains("sramecc-"))
return false;
break;
case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
// The image is 'sramecc+' so the environment must be 'sramecc+'.
if (!EnvTargetID.contains("sramecc+"))
return false;
break;
case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
break;
}
return true;
}
namespace {
/// Reads the AMDGPU specific per-kernel-metadata from an image.
class KernelInfoReader {
public:
KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
: KernelInfoMap(KIM) {}
/// Process ELF note to read AMDGPU metadata from respective information
/// fields.
Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
if (Note.getName() != "AMDGPU")
return Error::success(); // We are not interested in other things
assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
"Parse AMDGPU MetaData");
auto Desc = Note.getDesc(Align);
StringRef MsgPackString =
StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
msgpack::Document MsgPackDoc;
if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
return Error::success();
AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
if (!Verifier.verify(MsgPackDoc.getRoot()))
return Error::success();
auto RootMap = MsgPackDoc.getRoot().getMap(true);
if (auto Err = iterateAMDKernels(RootMap))
return Err;
return Error::success();
}
private:
/// Extracts the relevant information via simple string look-up in the msgpack
/// document elements.
Error
extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
std::string &KernelName,
offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
if (!V.first.isString())
return Error::success();
const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
return DK.getString() == SK;
};
const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
uint32_t *Vals) {
assert(DN.isArray() && "MsgPack DocNode is an array node");
auto DNA = DN.getArray();
assert(DNA.size() == 3 && "ArrayNode has at most three elements");
int I = 0;
for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
++DNABegin) {
Vals[I++] = DNABegin->getUInt();
}
};
if (IsKey(V.first, ".name")) {
KernelName = V.second.toString();
} else if (IsKey(V.first, ".sgpr_count")) {
KernelData.SGPRCount = V.second.getUInt();
} else if (IsKey(V.first, ".sgpr_spill_count")) {
KernelData.SGPRSpillCount = V.second.getUInt();
} else if (IsKey(V.first, ".vgpr_count")) {
KernelData.VGPRCount = V.second.getUInt();
} else if (IsKey(V.first, ".vgpr_spill_count")) {
KernelData.VGPRSpillCount = V.second.getUInt();
} else if (IsKey(V.first, ".agpr_count")) {
KernelData.AGPRCount = V.second.getUInt();
} else if (IsKey(V.first, ".private_segment_fixed_size")) {
KernelData.PrivateSegmentSize = V.second.getUInt();
} else if (IsKey(V.first, ".group_segment_fixed_size")) {
KernelData.GroupSegmentList = V.second.getUInt();
} else if (IsKey(V.first, ".reqd_workgroup_size")) {
GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
} else if (IsKey(V.first, ".workgroup_size_hint")) {
GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
} else if (IsKey(V.first, ".wavefront_size")) {
KernelData.WavefrontSize = V.second.getUInt();
} else if (IsKey(V.first, ".max_flat_workgroup_size")) {
KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
}
return Error::success();
}
/// Get the "amdhsa.kernels" element from the msgpack Document
Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
auto Res = MDN.find("amdhsa.kernels");
if (Res == MDN.end())
return createStringError(inconvertibleErrorCode(),
"Could not find amdhsa.kernels key");
auto Pair = *Res;
assert(Pair.second.isArray() &&
"AMDGPU kernel entries are arrays of entries");
return Pair.second.getArray();
}
/// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
/// MapDocNode that either maps a string to a single value (most of them) or
/// to another array of things. Currently, we only handle the case that maps
/// to scalar value.
Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
offloading::amdgpu::AMDGPUKernelMetaData KernelData;
std::string KernelName;
auto Entry = (*It).getMap();
for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
if (auto Err = extractKernelData(*MI, KernelName, KernelData))
return Err;
KernelInfoMap.insert({KernelName, KernelData});
return Error::success();
}
/// Go over the list of AMD kernels in the "amdhsa.kernels" entry
Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
auto KernelsOrErr = getAMDKernelsArray(MDN);
if (auto Err = KernelsOrErr.takeError())
return Err;
auto KernelsArr = *KernelsOrErr;
for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
if (!It->isMap())
continue; // we expect <key,value> pairs
// Obtain the value for the different entries. Each array entry is a
// MapDocNode
if (auto Err = generateKernelInfo(It))
return Err;
}
return Error::success();
}
// Kernel names are the keys
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
};
} // namespace
Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
MemoryBufferRef MemBuffer,
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
uint16_t &ELFABIVersion) {
Error Err = Error::success(); // Used later as out-parameter
auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
if (auto Err = ELFOrError.takeError())
return Err;
const object::ELF64LEFile ELFObj = ELFOrError.get();
Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
if (!Sections)
return Sections.takeError();
KernelInfoReader Reader(KernelInfoMap);
// Read the code object version from ELF image header
auto Header = ELFObj.getHeader();
ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
for (const auto &S : *Sections) {
if (S.sh_type != ELF::SHT_NOTE)
continue;
for (const auto N : ELFObj.notes(S, Err)) {
if (Err)
return Err;
// Fills the KernelInfoTabel entries in the reader
if ((Err = Reader.processNote(N, S.sh_addralign)))
return Err;
}
}
return Error::success();
}
Error offloading::intel::containerizeOpenMPSPIRVImage(
std::unique_ptr<MemoryBuffer> &Img) {
constexpr char INTEL_ONEOMP_OFFLOAD_VERSION[] = "1.0";
constexpr int NT_INTEL_ONEOMP_OFFLOAD_VERSION = 1;
constexpr int NT_INTEL_ONEOMP_OFFLOAD_IMAGE_COUNT = 2;
constexpr int NT_INTEL_ONEOMP_OFFLOAD_IMAGE_AUX = 3;
// Start creating notes for the ELF container.
std::vector<ELFYAML::NoteEntry> Notes;
std::string Version = toHex(INTEL_ONEOMP_OFFLOAD_VERSION);
Notes.emplace_back(ELFYAML::NoteEntry{"INTELONEOMPOFFLOAD",
yaml::BinaryRef(Version),
NT_INTEL_ONEOMP_OFFLOAD_VERSION});
// The AuxInfo string will hold auxiliary information for the image.
// ELFYAML::NoteEntry structures will hold references to the
// string, so we have to make sure the string is valid.
std::string AuxInfo;
// TODO: Pass compile/link opts
StringRef CompileOpts = "";
StringRef LinkOpts = "";
unsigned ImageFmt = 1; // SPIR-V format
AuxInfo = toHex((Twine(0) + Twine('\0') + Twine(ImageFmt) + Twine('\0') +
CompileOpts + Twine('\0') + LinkOpts)
.str());
Notes.emplace_back(ELFYAML::NoteEntry{"INTELONEOMPOFFLOAD",
yaml::BinaryRef(AuxInfo),
NT_INTEL_ONEOMP_OFFLOAD_IMAGE_AUX});
std::string ImgCount = toHex(Twine(1).str()); // always one image per ELF
Notes.emplace_back(ELFYAML::NoteEntry{"INTELONEOMPOFFLOAD",
yaml::BinaryRef(ImgCount),
NT_INTEL_ONEOMP_OFFLOAD_IMAGE_COUNT});
std::string YamlFile;
llvm::raw_string_ostream YamlFileStream(YamlFile);
// Write the YAML template file.
// We use 64-bit little-endian ELF currently.
ELFYAML::FileHeader Header{};
Header.Class = ELF::ELFCLASS64;
Header.Data = ELF::ELFDATA2LSB;
Header.Type = ELF::ET_DYN;
Header.Machine = ELF::EM_INTELGT;
// Create a section with notes.
ELFYAML::NoteSection Section{};
Section.Type = ELF::SHT_NOTE;
Section.AddressAlign = 0;
Section.Name = ".note.inteloneompoffload";
Section.Notes.emplace(std::move(Notes));
ELFYAML::Object Object{};
Object.Header = Header;
Object.Chunks.push_back(
std::make_unique<ELFYAML::NoteSection>(std::move(Section)));
// Create the section that will hold the image
ELFYAML::RawContentSection ImageSection{};
ImageSection.Type = ELF::SHT_PROGBITS;
ImageSection.AddressAlign = 0;
std::string Name = "__openmp_offload_spirv_0";
ImageSection.Name = Name;
ImageSection.Content =
llvm::yaml::BinaryRef(arrayRefFromStringRef(Img->getBuffer()));
Object.Chunks.push_back(
std::make_unique<ELFYAML::RawContentSection>(std::move(ImageSection)));
Error Err = Error::success();
llvm::yaml::yaml2elf(
Object, YamlFileStream,
[&Err](const Twine &Msg) { Err = createStringError(Msg); }, UINT64_MAX);
if (Err)
return Err;
Img = MemoryBuffer::getMemBufferCopy(YamlFile);
return Error::success();
}