Compiling code for AMD GPUs requires knowledge of which chipset is being targeted, especially if the code uses chipset-specific intrinsics (which is the case in a downstream convolution generator). This commit adds `target`, `chipset` and `features` arguments to the SerializeToHsaco constructor to enable passing in this required information. It also amends the ROCm integration tests to pass in the target chipset, which is set to the chipset of the first GPU on the system executing the tests. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D114107
289 lines
10 KiB
C++
289 lines
10 KiB
C++
//===- LowerGPUToHSACO.cpp - Convert GPU kernel to HSACO blob -------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements a pass that serializes a gpu module into HSAco blob and
|
|
// adds that blob as a string attribute of the module.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
#include "mlir/Dialect/GPU/Passes.h"
|
|
|
|
#if MLIR_GPU_TO_HSACO_PASS_ENABLE
|
|
#include "mlir/Pass/Pass.h"
|
|
#include "mlir/Support/FileUtilities.h"
|
|
#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
|
|
#include "mlir/Target/LLVMIR/Export.h"
|
|
|
|
#include "llvm/MC/MCAsmBackend.h"
|
|
#include "llvm/MC/MCAsmInfo.h"
|
|
#include "llvm/MC/MCCodeEmitter.h"
|
|
#include "llvm/MC/MCContext.h"
|
|
#include "llvm/MC/MCObjectFileInfo.h"
|
|
#include "llvm/MC/MCObjectWriter.h"
|
|
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
|
|
#include "llvm/MC/MCStreamer.h"
|
|
#include "llvm/MC/MCSubtargetInfo.h"
|
|
|
|
#include "llvm/MC/TargetRegistry.h"
|
|
#include "llvm/Support/FileUtilities.h"
|
|
#include "llvm/Support/LineIterator.h"
|
|
#include "llvm/Support/Program.h"
|
|
#include "llvm/Support/TargetSelect.h"
|
|
#include "llvm/Support/WithColor.h"
|
|
#include "llvm/Target/TargetOptions.h"
|
|
|
|
#include "lld/Common/Driver.h"
|
|
|
|
#include "hip/hip_version.h"
|
|
|
|
#include <mutex>
|
|
|
|
using namespace mlir;
|
|
|
|
namespace {
|
|
class SerializeToHsacoPass
|
|
: public PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass> {
|
|
public:
|
|
SerializeToHsacoPass(StringRef triple, StringRef arch, StringRef features);
|
|
StringRef getArgument() const override { return "gpu-to-hsaco"; }
|
|
StringRef getDescription() const override {
|
|
return "Lower GPU kernel function to HSACO binary annotations";
|
|
}
|
|
|
|
private:
|
|
void getDependentDialects(DialectRegistry ®istry) const override;
|
|
|
|
// Serializes ROCDL to HSACO.
|
|
std::unique_ptr<std::vector<char>>
|
|
serializeISA(const std::string &isa) override;
|
|
|
|
std::unique_ptr<SmallVectorImpl<char>> assembleIsa(const std::string &isa);
|
|
std::unique_ptr<std::vector<char>>
|
|
createHsaco(const SmallVectorImpl<char> &isaBinary);
|
|
};
|
|
} // namespace
|
|
|
|
static std::string getDefaultChip() {
|
|
const char kDefaultChip[] = "gfx900";
|
|
|
|
// Locate rocm_agent_enumerator.
|
|
const char kRocmAgentEnumerator[] = "rocm_agent_enumerator";
|
|
llvm::ErrorOr<std::string> rocmAgentEnumerator = llvm::sys::findProgramByName(
|
|
kRocmAgentEnumerator, {__ROCM_PATH__ "/bin"});
|
|
if (!rocmAgentEnumerator) {
|
|
llvm::WithColor::warning(llvm::errs())
|
|
<< kRocmAgentEnumerator << "couldn't be located under " << __ROCM_PATH__
|
|
<< "/bin\n";
|
|
return kDefaultChip;
|
|
}
|
|
|
|
// Prepare temp file to hold the outputs.
|
|
int tempFd = -1;
|
|
SmallString<128> tempFilename;
|
|
if (llvm::sys::fs::createTemporaryFile("rocm_agent", "txt", tempFd,
|
|
tempFilename)) {
|
|
llvm::WithColor::warning(llvm::errs())
|
|
<< "temporary file for " << kRocmAgentEnumerator << " creation error\n";
|
|
return kDefaultChip;
|
|
}
|
|
llvm::FileRemover cleanup(tempFilename);
|
|
|
|
// Invoke rocm_agent_enumerator.
|
|
std::string errorMessage;
|
|
SmallVector<StringRef, 2> args{"-t", "GPU"};
|
|
Optional<StringRef> redirects[3] = {{""}, tempFilename.str(), {""}};
|
|
int result =
|
|
llvm::sys::ExecuteAndWait(rocmAgentEnumerator.get(), args, llvm::None,
|
|
redirects, 0, 0, &errorMessage);
|
|
if (result) {
|
|
llvm::WithColor::warning(llvm::errs())
|
|
<< kRocmAgentEnumerator << " invocation error: " << errorMessage
|
|
<< "\n";
|
|
return kDefaultChip;
|
|
}
|
|
|
|
// Load and parse the result.
|
|
auto gfxIsaList = openInputFile(tempFilename);
|
|
if (!gfxIsaList) {
|
|
llvm::WithColor::error(llvm::errs())
|
|
<< "read ROCm agent list temp file error\n";
|
|
return kDefaultChip;
|
|
}
|
|
for (llvm::line_iterator lines(*gfxIsaList); !lines.is_at_end(); ++lines) {
|
|
// Skip the line with content "gfx000".
|
|
if (*lines == "gfx000")
|
|
continue;
|
|
// Use the first ISA version found.
|
|
return lines->str();
|
|
}
|
|
|
|
return kDefaultChip;
|
|
}
|
|
|
|
// Sets the 'option' to 'value' unless it already has a value.
|
|
static void maybeSetOption(Pass::Option<std::string> &option,
|
|
function_ref<std::string()> getValue) {
|
|
if (!option.hasValue())
|
|
option = getValue();
|
|
}
|
|
|
|
SerializeToHsacoPass::SerializeToHsacoPass(StringRef triple, StringRef arch,
|
|
StringRef features) {
|
|
maybeSetOption(this->triple, [&triple] { return triple.str(); });
|
|
maybeSetOption(this->chip, [&arch] { return arch.str(); });
|
|
maybeSetOption(this->features, [&features] { return features.str(); });
|
|
}
|
|
|
|
void SerializeToHsacoPass::getDependentDialects(
|
|
DialectRegistry ®istry) const {
|
|
registerROCDLDialectTranslation(registry);
|
|
gpu::SerializeToBlobPass::getDependentDialects(registry);
|
|
}
|
|
|
|
std::unique_ptr<SmallVectorImpl<char>>
|
|
SerializeToHsacoPass::assembleIsa(const std::string &isa) {
|
|
auto loc = getOperation().getLoc();
|
|
|
|
SmallVector<char, 0> result;
|
|
llvm::raw_svector_ostream os(result);
|
|
|
|
llvm::Triple triple(llvm::Triple::normalize(this->triple));
|
|
std::string error;
|
|
const llvm::Target *target =
|
|
llvm::TargetRegistry::lookupTarget(triple.normalize(), error);
|
|
if (!target) {
|
|
emitError(loc, Twine("failed to lookup target: ") + error);
|
|
return {};
|
|
}
|
|
|
|
llvm::SourceMgr srcMgr;
|
|
srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa),
|
|
llvm::SMLoc());
|
|
|
|
const llvm::MCTargetOptions mcOptions;
|
|
std::unique_ptr<llvm::MCRegisterInfo> mri(
|
|
target->createMCRegInfo(this->triple));
|
|
std::unique_ptr<llvm::MCAsmInfo> mai(
|
|
target->createMCAsmInfo(*mri, this->triple, mcOptions));
|
|
mai->setRelaxELFRelocations(true);
|
|
|
|
llvm::MCContext ctx(triple, mai.get(), mri.get(), &srcMgr, &mcOptions);
|
|
std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo(
|
|
ctx, /*PIC=*/false, /*LargeCodeModel=*/false));
|
|
ctx.setObjectFileInfo(mofi.get());
|
|
|
|
SmallString<128> cwd;
|
|
if (!llvm::sys::fs::current_path(cwd))
|
|
ctx.setCompilationDir(cwd);
|
|
|
|
std::unique_ptr<llvm::MCStreamer> mcStreamer;
|
|
std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo());
|
|
std::unique_ptr<llvm::MCSubtargetInfo> sti(
|
|
target->createMCSubtargetInfo(this->triple, this->chip, this->features));
|
|
|
|
llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, *mri, ctx);
|
|
llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions);
|
|
mcStreamer.reset(target->createMCObjectStreamer(
|
|
triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab),
|
|
mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce),
|
|
*sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible,
|
|
/*DWARFMustBeAtTheEnd*/ false));
|
|
mcStreamer->setUseAssemblerInfoForParsing(true);
|
|
|
|
std::unique_ptr<llvm::MCAsmParser> parser(
|
|
createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
|
|
std::unique_ptr<llvm::MCTargetAsmParser> tap(
|
|
target->createMCAsmParser(*sti, *parser, *mcii, mcOptions));
|
|
|
|
if (!tap) {
|
|
emitError(loc, "assembler initialization error");
|
|
return {};
|
|
}
|
|
|
|
parser->setTargetParser(*tap);
|
|
parser->Run(false);
|
|
|
|
return std::make_unique<SmallVector<char, 0>>(std::move(result));
|
|
}
|
|
|
|
std::unique_ptr<std::vector<char>>
|
|
SerializeToHsacoPass::createHsaco(const SmallVectorImpl<char> &isaBinary) {
|
|
auto loc = getOperation().getLoc();
|
|
|
|
// Save the ISA binary to a temp file.
|
|
int tempIsaBinaryFd = -1;
|
|
SmallString<128> tempIsaBinaryFilename;
|
|
if (llvm::sys::fs::createTemporaryFile("kernel", "o", tempIsaBinaryFd,
|
|
tempIsaBinaryFilename)) {
|
|
emitError(loc, "temporary file for ISA binary creation error");
|
|
return {};
|
|
}
|
|
llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
|
|
llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true);
|
|
tempIsaBinaryOs << StringRef(isaBinary.data(), isaBinary.size());
|
|
tempIsaBinaryOs.close();
|
|
|
|
// Create a temp file for HSA code object.
|
|
int tempHsacoFD = -1;
|
|
SmallString<128> tempHsacoFilename;
|
|
if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFD,
|
|
tempHsacoFilename)) {
|
|
emitError(loc, "temporary file for HSA code object creation error");
|
|
return {};
|
|
}
|
|
llvm::FileRemover cleanupHsaco(tempHsacoFilename);
|
|
|
|
{
|
|
static std::mutex mutex;
|
|
const std::lock_guard<std::mutex> lock(mutex);
|
|
// Invoke lld. Expect a true return value from lld.
|
|
if (!lld::elf::link({"ld.lld", "-shared", tempIsaBinaryFilename.c_str(),
|
|
"-o", tempHsacoFilename.c_str()},
|
|
/*canEarlyExit=*/false, llvm::outs(), llvm::errs())) {
|
|
emitError(loc, "lld invocation error");
|
|
return {};
|
|
}
|
|
}
|
|
|
|
// Load the HSA code object.
|
|
auto hsacoFile = openInputFile(tempHsacoFilename);
|
|
if (!hsacoFile) {
|
|
emitError(loc, "read HSA code object from temp file error");
|
|
return {};
|
|
}
|
|
|
|
StringRef buffer = hsacoFile->getBuffer();
|
|
return std::make_unique<std::vector<char>>(buffer.begin(), buffer.end());
|
|
}
|
|
|
|
std::unique_ptr<std::vector<char>>
|
|
SerializeToHsacoPass::serializeISA(const std::string &isa) {
|
|
auto isaBinary = assembleIsa(isa);
|
|
if (!isaBinary)
|
|
return {};
|
|
return createHsaco(*isaBinary);
|
|
}
|
|
|
|
// Register pass to serialize GPU kernel functions to a HSACO binary annotation.
|
|
void mlir::registerGpuSerializeToHsacoPass() {
|
|
PassRegistration<SerializeToHsacoPass> registerSerializeToHSACO(
|
|
[] {
|
|
// Initialize LLVM AMDGPU backend.
|
|
LLVMInitializeAMDGPUAsmParser();
|
|
LLVMInitializeAMDGPUAsmPrinter();
|
|
LLVMInitializeAMDGPUTarget();
|
|
LLVMInitializeAMDGPUTargetInfo();
|
|
LLVMInitializeAMDGPUTargetMC();
|
|
|
|
return std::make_unique<SerializeToHsacoPass>("amdgcn-amd-amdhsa", "",
|
|
"");
|
|
});
|
|
}
|
|
#else // MLIR_GPU_TO_HSACO_PASS_ENABLE
|
|
void mlir::registerGpuSerializeToHsacoPass() {}
|
|
#endif // MLIR_GPU_TO_HSACO_PASS_ENABLE
|