llvm-project/llvm/lib/Frontend/Offloading/Utility.cpp

//===- Utility.cpp ------ Collection of generic offloading utilities ------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/Frontend/Offloading/Utility.h"
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Value.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/ObjectYAML/ELFYAML.h"
#include "llvm/ObjectYAML/yaml2obj.h"
#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"

using namespace llvm;
using namespace llvm::offloading;

StructType *offloading::getEntryTy(Module &M) {
  LLVMContext &C = M.getContext();
  StructType *EntryTy =
      StructType::getTypeByName(C, "struct.__tgt_offload_entry");
  if (!EntryTy)
    EntryTy = StructType::create(
        "struct.__tgt_offload_entry", Type::getInt64Ty(C), Type::getInt16Ty(C),
        Type::getInt16Ty(C), Type::getInt32Ty(C), PointerType::getUnqual(C),
        PointerType::getUnqual(C), Type::getInt64Ty(C), Type::getInt64Ty(C),
        PointerType::getUnqual(C));
  return EntryTy;
}

std::pair<Constant *, GlobalVariable *>
offloading::getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind,
                                          Constant *Addr, StringRef Name,
                                          uint64_t Size, uint32_t Flags,
                                          uint64_t Data, Constant *AuxAddr) {
  const llvm::Triple &Triple = M.getTargetTriple();
  Type *PtrTy = PointerType::getUnqual(M.getContext());
  Type *Int64Ty = Type::getInt64Ty(M.getContext());
  Type *Int32Ty = Type::getInt32Ty(M.getContext());
  Type *Int16Ty = Type::getInt16Ty(M.getContext());

  Constant *AddrName = ConstantDataArray::getString(M.getContext(), Name);

  StringRef Prefix =
      Triple.isNVPTX() ? "$offloading$entry_name" : ".offloading.entry_name";

  // Create the constant string used to look up the symbol in the device.
  auto *Str =
      new GlobalVariable(M, AddrName->getType(), /*isConstant=*/true,
                         GlobalValue::InternalLinkage, AddrName, Prefix);
  StringRef SectionName = ".llvm.rodata.offloading";
  Str->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
  Str->setSection(SectionName);
  Str->setAlignment(Align(1));

  // Make a metadata node for these constants so it can be queried from IR.
  NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.offloading.symbols");
  Metadata *MDVals[] = {ConstantAsMetadata::get(Str)};
  MD->addOperand(llvm::MDNode::get(M.getContext(), MDVals));

  // Construct the offloading entry.
  Constant *EntryData[] = {
      ConstantExpr::getNullValue(Int64Ty),
      ConstantInt::get(Int16Ty, 1),
      ConstantInt::get(Int16Ty, Kind),
      ConstantInt::get(Int32Ty, Flags),
      ConstantExpr::getPointerBitCastOrAddrSpaceCast(Addr, PtrTy),
      ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, PtrTy),
      ConstantInt::get(Int64Ty, Size),
      ConstantInt::get(Int64Ty, Data),
      AuxAddr ? ConstantExpr::getPointerBitCastOrAddrSpaceCast(AuxAddr, PtrTy)
              : ConstantExpr::getNullValue(PtrTy)};
  Constant *EntryInitializer = ConstantStruct::get(getEntryTy(M), EntryData);
  return {EntryInitializer, Str};
}

void offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind,
                                     Constant *Addr, StringRef Name,
                                     uint64_t Size, uint32_t Flags,
                                     uint64_t Data, Constant *AuxAddr,
                                     StringRef SectionName) {
  const llvm::Triple &Triple = M.getTargetTriple();

  auto [EntryInitializer, NameGV] = getOffloadingEntryInitializer(
      M, Kind, Addr, Name, Size, Flags, Data, AuxAddr);

  StringRef Prefix =
      Triple.isNVPTX() ? "$offloading$entry$" : ".offloading.entry.";
  auto *Entry = new GlobalVariable(
      M, getEntryTy(M),
      /*isConstant=*/true, GlobalValue::WeakAnyLinkage, EntryInitializer,
      Prefix + Name, nullptr, GlobalValue::NotThreadLocal,
      M.getDataLayout().getDefaultGlobalsAddressSpace());

  // The entry has to be created in the section the linker expects it to be.
  if (Triple.isOSBinFormatCOFF())
    Entry->setSection((SectionName + "$OE").str());
  else
    Entry->setSection(SectionName);
  Entry->setAlignment(Align(object::OffloadBinary::getAlignment()));
}

std::pair<GlobalVariable *, GlobalVariable *>
offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
  const llvm::Triple &Triple = M.getTargetTriple();

  auto *ZeroInitilaizer =
      ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u));
  auto *EntryInit = Triple.isOSBinFormatCOFF() ? ZeroInitilaizer : nullptr;
  auto *EntryType = ArrayType::get(getEntryTy(M), 0);
  auto Linkage = Triple.isOSBinFormatCOFF() ? GlobalValue::WeakODRLinkage
                                            : GlobalValue::ExternalLinkage;

  auto *EntriesB =
      new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
                         "__start_" + SectionName);
  EntriesB->setVisibility(GlobalValue::HiddenVisibility);
  auto *EntriesE =
      new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
                         "__stop_" + SectionName);
  EntriesE->setVisibility(GlobalValue::HiddenVisibility);

  if (Triple.isOSBinFormatELF()) {
    // We assume that external begin/end symbols that we have created above will
    // be defined by the linker. This is done whenever a section name with a
    // valid C-identifier is present. We define a dummy variable here to force
    // the linker to always provide these symbols.
    auto *DummyEntry = new GlobalVariable(
        M, ZeroInitilaizer->getType(), true, GlobalVariable::InternalLinkage,
        ZeroInitilaizer, "__dummy." + SectionName);
    DummyEntry->setSection(SectionName);
    DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment()));
    appendToCompilerUsed(M, DummyEntry);
  } else {
    // The COFF linker will merge sections containing a '$' together into a
    // single section. The order of entries in this section will be sorted
    // alphabetically by the characters following the '$' in the name. Set the
    // sections here to ensure that the beginning and end symbols are sorted.
    EntriesB->setSection((SectionName + "$OA").str());
    EntriesE->setSection((SectionName + "$OZ").str());
  }

  return std::make_pair(EntriesB, EntriesE);
}

bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
                                                        uint32_t ImageFlags,
                                                        StringRef EnvTargetID) {
  using namespace llvm::ELF;
  StringRef EnvArch = EnvTargetID.split(":").first;

  // Trivial check if the base processors match.
  if (EnvArch != ImageArch)
    return false;

  // Check if the image is requesting xnack on or off.
  switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
  case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
    // The image is 'xnack-' so the environment must be 'xnack-'.
    if (!EnvTargetID.contains("xnack-"))
      return false;
    break;
  case EF_AMDGPU_FEATURE_XNACK_ON_V4:
    // The image is 'xnack+' so the environment must be 'xnack+'.
    if (!EnvTargetID.contains("xnack+"))
      return false;
    break;
  case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
  case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
  default:
    break;
  }

  // Check if the image is requesting sramecc on or off.
  switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
  case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
    // The image is 'sramecc-' so the environment must be 'sramecc-'.
    if (!EnvTargetID.contains("sramecc-"))
      return false;
    break;
  case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
    // The image is 'sramecc+' so the environment must be 'sramecc+'.
    if (!EnvTargetID.contains("sramecc+"))
      return false;
    break;
  case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
  case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
    break;
  }

  return true;
}

namespace {
/// Reads the AMDGPU specific per-kernel-metadata from an image.
class KernelInfoReader {
public:
  KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
      : KernelInfoMap(KIM) {}

  /// Process ELF note to read AMDGPU metadata from respective information
  /// fields.
  Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
    if (Note.getName() != "AMDGPU")
      return Error::success(); // We are not interested in other things

    assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
           "Parse AMDGPU MetaData");
    auto Desc = Note.getDesc(Align);
    StringRef MsgPackString =
        StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
    msgpack::Document MsgPackDoc;
    if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
      return Error::success();

    AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
    if (!Verifier.verify(MsgPackDoc.getRoot()))
      return Error::success();

    auto RootMap = MsgPackDoc.getRoot().getMap(true);

    if (auto Err = iterateAMDKernels(RootMap))
      return Err;

    return Error::success();
  }

private:
  /// Extracts the relevant information via simple string look-up in the msgpack
  /// document elements.
  Error
  extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
                    std::string &KernelName,
                    offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
    if (!V.first.isString())
      return Error::success();

    const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
      return DK.getString() == SK;
    };

    const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
                                           uint32_t *Vals) {
      assert(DN.isArray() && "MsgPack DocNode is an array node");
      auto DNA = DN.getArray();
      assert(DNA.size() == 3 && "ArrayNode has at most three elements");

      int I = 0;
      for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
           ++DNABegin) {
        Vals[I++] = DNABegin->getUInt();
      }
    };

    if (IsKey(V.first, ".name")) {
      KernelName = V.second.toString();
    } else if (IsKey(V.first, ".sgpr_count")) {
      KernelData.SGPRCount = V.second.getUInt();
    } else if (IsKey(V.first, ".sgpr_spill_count")) {
      KernelData.SGPRSpillCount = V.second.getUInt();
    } else if (IsKey(V.first, ".vgpr_count")) {
      KernelData.VGPRCount = V.second.getUInt();
    } else if (IsKey(V.first, ".vgpr_spill_count")) {
      KernelData.VGPRSpillCount = V.second.getUInt();
    } else if (IsKey(V.first, ".agpr_count")) {
      KernelData.AGPRCount = V.second.getUInt();
    } else if (IsKey(V.first, ".private_segment_fixed_size")) {
      KernelData.PrivateSegmentSize = V.second.getUInt();
    } else if (IsKey(V.first, ".group_segment_fixed_size")) {
      KernelData.GroupSegmentList = V.second.getUInt();
    } else if (IsKey(V.first, ".reqd_workgroup_size")) {
      GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
    } else if (IsKey(V.first, ".workgroup_size_hint")) {
      GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
    } else if (IsKey(V.first, ".wavefront_size")) {
      KernelData.WavefrontSize = V.second.getUInt();
    } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
      KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
    }

    return Error::success();
  }

  /// Get the "amdhsa.kernels" element from the msgpack Document
  Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
    auto Res = MDN.find("amdhsa.kernels");
    if (Res == MDN.end())
      return createStringError(inconvertibleErrorCode(),
                               "Could not find amdhsa.kernels key");

    auto Pair = *Res;
    assert(Pair.second.isArray() &&
           "AMDGPU kernel entries are arrays of entries");

    return Pair.second.getArray();
  }

  /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
  /// MapDocNode that either maps a string to a single value (most of them) or
  /// to another array of things. Currently, we only handle the case that maps
  /// to scalar value.
  Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
    offloading::amdgpu::AMDGPUKernelMetaData KernelData;
    std::string KernelName;
    auto Entry = (*It).getMap();
    for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
      if (auto Err = extractKernelData(*MI, KernelName, KernelData))
        return Err;

    KernelInfoMap.insert({KernelName, KernelData});
    return Error::success();
  }

  /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
  Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
    auto KernelsOrErr = getAMDKernelsArray(MDN);
    if (auto Err = KernelsOrErr.takeError())
      return Err;

    auto KernelsArr = *KernelsOrErr;
    for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
      if (!It->isMap())
        continue; // we expect <key,value> pairs

      // Obtain the value for the different entries. Each array entry is a
      // MapDocNode
      if (auto Err = generateKernelInfo(It))
        return Err;
    }
    return Error::success();
  }

  // Kernel names are the keys
  StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
};
} // namespace

Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
    MemoryBufferRef MemBuffer,
    StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
    uint16_t &ELFABIVersion) {
  Error Err = Error::success(); // Used later as out-parameter

  auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
  if (auto Err = ELFOrError.takeError())
    return Err;

  const object::ELF64LEFile ELFObj = ELFOrError.get();
  Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
  if (!Sections)
    return Sections.takeError();
  KernelInfoReader Reader(KernelInfoMap);

  // Read the code object version from ELF image header
  auto Header = ELFObj.getHeader();
  ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
  for (const auto &S : *Sections) {
    if (S.sh_type != ELF::SHT_NOTE)
      continue;

    for (const auto N : ELFObj.notes(S, Err)) {
      if (Err)
        return Err;
      // Fills the KernelInfoTabel entries in the reader
      if ((Err = Reader.processNote(N, S.sh_addralign)))
        return Err;
    }
  }
  return Error::success();
}
Error offloading::intel::containerizeOpenMPSPIRVImage(
    std::unique_ptr<MemoryBuffer> &Img) {
  constexpr char INTEL_ONEOMP_OFFLOAD_VERSION[] = "1.0";
  constexpr int NT_INTEL_ONEOMP_OFFLOAD_VERSION = 1;
  constexpr int NT_INTEL_ONEOMP_OFFLOAD_IMAGE_COUNT = 2;
  constexpr int NT_INTEL_ONEOMP_OFFLOAD_IMAGE_AUX = 3;

  // Start creating notes for the ELF container.
  std::vector<ELFYAML::NoteEntry> Notes;
  std::string Version = toHex(INTEL_ONEOMP_OFFLOAD_VERSION);
  Notes.emplace_back(ELFYAML::NoteEntry{"INTELONEOMPOFFLOAD",
                                        yaml::BinaryRef(Version),
                                        NT_INTEL_ONEOMP_OFFLOAD_VERSION});

  // The AuxInfo string will hold auxiliary information for the image.
  // ELFYAML::NoteEntry structures will hold references to the
  // string, so we have to make sure the string is valid.
  std::string AuxInfo;

  // TODO: Pass compile/link opts
  StringRef CompileOpts = "";
  StringRef LinkOpts = "";

  unsigned ImageFmt = 1; // SPIR-V format

  AuxInfo = toHex((Twine(0) + Twine('\0') + Twine(ImageFmt) + Twine('\0') +
                   CompileOpts + Twine('\0') + LinkOpts)
                      .str());
  Notes.emplace_back(ELFYAML::NoteEntry{"INTELONEOMPOFFLOAD",
                                        yaml::BinaryRef(AuxInfo),
                                        NT_INTEL_ONEOMP_OFFLOAD_IMAGE_AUX});

  std::string ImgCount = toHex(Twine(1).str()); // always one image per ELF
  Notes.emplace_back(ELFYAML::NoteEntry{"INTELONEOMPOFFLOAD",
                                        yaml::BinaryRef(ImgCount),
                                        NT_INTEL_ONEOMP_OFFLOAD_IMAGE_COUNT});

  std::string YamlFile;
  llvm::raw_string_ostream YamlFileStream(YamlFile);

  // Write the YAML template file.

  // We use 64-bit little-endian ELF currently.
  ELFYAML::FileHeader Header{};
  Header.Class = ELF::ELFCLASS64;
  Header.Data = ELF::ELFDATA2LSB;
  Header.Type = ELF::ET_DYN;
  // Use an existing Intel machine type as there is not one specifically for
  // Intel GPUs.
  Header.Machine = ELF::EM_IA_64;

  // Create a section with notes.
  ELFYAML::NoteSection Section{};
  Section.Type = ELF::SHT_NOTE;
  Section.AddressAlign = 0;
  Section.Name = ".note.inteloneompoffload";
  Section.Notes.emplace(std::move(Notes));

  ELFYAML::Object Object{};
  Object.Header = Header;
  Object.Chunks.push_back(
      std::make_unique<ELFYAML::NoteSection>(std::move(Section)));

  // Create the section that will hold the image
  ELFYAML::RawContentSection ImageSection{};
  ImageSection.Type = ELF::SHT_PROGBITS;
  ImageSection.AddressAlign = 0;
  std::string Name = "__openmp_offload_spirv_0";
  ImageSection.Name = Name;
  ImageSection.Content =
      llvm::yaml::BinaryRef(arrayRefFromStringRef(Img->getBuffer()));
  Object.Chunks.push_back(
      std::make_unique<ELFYAML::RawContentSection>(std::move(ImageSection)));
  Error Err = Error::success();
  llvm::yaml::yaml2elf(
      Object, YamlFileStream,
      [&Err](const Twine &Msg) { Err = createStringError(Msg); }, UINT64_MAX);
  if (Err)
    return Err;

  Img = MemoryBuffer::getMemBufferCopy(YamlFile);
  return Error::success();
}