417 lines
16 KiB
C++
417 lines
16 KiB
C++
//===- Target.cpp - MLIR LLVM XeVM target compilation -----------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This files defines XeVM target related functions including registration
|
|
// calls for the `#xevm.target` compilation attribute.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/Target/LLVM/XeVM/Target.h"
|
|
|
|
#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
|
|
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
|
|
#include "mlir/Dialect/LLVMIR/XeVMDialect.h"
|
|
#include "mlir/IR/BuiltinAttributeInterfaces.h"
|
|
#include "mlir/IR/BuiltinDialect.h"
|
|
#include "mlir/IR/BuiltinTypes.h"
|
|
#include "mlir/IR/DialectResourceBlobManager.h"
|
|
#include "mlir/Target/LLVM/XeVM/Utils.h"
|
|
#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
|
|
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
|
|
#include "mlir/Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.h"
|
|
#include "mlir/Target/LLVMIR/Export.h"
|
|
#include "llvm/IR/LegacyPassManager.h"
|
|
#include "llvm/Target/TargetMachine.h"
|
|
|
|
#include "llvm/Bitcode/BitcodeWriter.h"
|
|
#include "llvm/Config/Targets.h"
|
|
#include "llvm/Support/FileSystem.h"
|
|
#include "llvm/Support/FileUtilities.h"
|
|
#include "llvm/Support/FormatVariadic.h"
|
|
#include "llvm/Support/MemoryBuffer.h"
|
|
#include "llvm/Support/Path.h"
|
|
#include "llvm/Support/Process.h"
|
|
#include "llvm/Support/Program.h"
|
|
#include "llvm/Support/TargetSelect.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
#include <cstdint>
|
|
#include <cstdlib>
|
|
|
|
using namespace mlir;
|
|
using namespace mlir::xevm;
|
|
|
|
namespace {
|
|
// XeVM implementation of the gpu:TargetAttrInterface.
|
|
class XeVMTargetAttrImpl
|
|
: public gpu::TargetAttrInterface::FallbackModel<XeVMTargetAttrImpl> {
|
|
public:
|
|
std::optional<mlir::gpu::SerializedObject>
|
|
serializeToObject(Attribute attribute, Operation *module,
|
|
const gpu::TargetOptions &options) const;
|
|
|
|
Attribute createObject(Attribute attribute, Operation *module,
|
|
const mlir::gpu::SerializedObject &object,
|
|
const gpu::TargetOptions &options) const;
|
|
};
|
|
} // namespace
|
|
|
|
void mlir::xevm::registerXeVMTargetInterfaceExternalModels(
|
|
DialectRegistry ®istry) {
|
|
registry.addExtension(+[](MLIRContext *ctx, XeVMDialect *dialect) {
|
|
XeVMTargetAttr::attachInterface<XeVMTargetAttrImpl>(*ctx);
|
|
});
|
|
}
|
|
|
|
void mlir::xevm::registerXeVMTargetInterfaceExternalModels(
|
|
MLIRContext &context) {
|
|
DialectRegistry registry;
|
|
registerXeVMTargetInterfaceExternalModels(registry);
|
|
context.appendDialectRegistry(registry);
|
|
}
|
|
|
|
SerializeGPUModuleBase::SerializeGPUModuleBase(
|
|
Operation &module, XeVMTargetAttr xeTarget,
|
|
const gpu::TargetOptions &targetOptions)
|
|
: ModuleToObject(module, xeTarget.getTriple(), "", {}, xeTarget.getO()),
|
|
xeTarget(xeTarget), librariesToLink(targetOptions.getLibrariesToLink()),
|
|
targetOptions(targetOptions) {
|
|
if (xeTarget.getLinkFiles())
|
|
librariesToLink.append(xeTarget.getLinkFiles().begin(),
|
|
xeTarget.getLinkFiles().end());
|
|
}
|
|
|
|
XeVMTargetAttr SerializeGPUModuleBase::getTarget() const { return xeTarget; }
|
|
|
|
std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
|
|
SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) {
|
|
if (librariesToLink.empty())
|
|
return SmallVector<std::unique_ptr<llvm::Module>>();
|
|
SmallVector<std::unique_ptr<llvm::Module>> bcFiles;
|
|
if (failed(loadBitcodeFilesFromList(module.getContext(), librariesToLink,
|
|
bcFiles)))
|
|
return std::nullopt;
|
|
return std::move(bcFiles);
|
|
}
|
|
|
|
gpu::GPUModuleOp SerializeGPUModuleBase::getGPUModuleOp() {
|
|
return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
|
|
}
|
|
|
|
// There is 1 way to finalize IL to native code: IGC
|
|
// There are 2 ways to access IGC: AOT (ocloc) and JIT (L0 runtime).
|
|
// - L0 runtime consumes IL and is external to MLIR codebase (rt wrappers).
|
|
// - `ocloc` tool can be "queried" from within MLIR.
|
|
FailureOr<SmallVector<char, 0>>
|
|
SerializeGPUModuleBase::compileToBinary(StringRef asmStr,
|
|
StringRef inputFormat) {
|
|
using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
|
|
// Find the `ocloc` tool.
|
|
std::optional<std::string> oclocCompiler = findTool("ocloc");
|
|
if (!oclocCompiler)
|
|
return failure();
|
|
Location loc = getGPUModuleOp().getLoc();
|
|
std::string basename = llvm::formatv(
|
|
"mlir-{0}-{1}-{2}", getGPUModuleOp().getNameAttr().getValue(),
|
|
getTarget().getTriple(), getTarget().getChip());
|
|
|
|
auto createTemp = [&](StringRef name,
|
|
StringRef suffix) -> FailureOr<TmpFile> {
|
|
llvm::SmallString<128> filePath;
|
|
if (auto ec = llvm::sys::fs::createTemporaryFile(name, suffix, filePath))
|
|
return getGPUModuleOp().emitError()
|
|
<< "Couldn't create the temp file: `" << filePath
|
|
<< "`, error message: " << ec.message();
|
|
|
|
return TmpFile(filePath, llvm::FileRemover(filePath.c_str()));
|
|
};
|
|
// Create temp file
|
|
FailureOr<TmpFile> asmFile = createTemp(basename, "asm");
|
|
FailureOr<TmpFile> binFile = createTemp(basename, "");
|
|
FailureOr<TmpFile> logFile = createTemp(basename, "log");
|
|
if (failed(logFile) || failed(asmFile) || failed(binFile))
|
|
return failure();
|
|
// Dump the assembly to a temp file
|
|
std::error_code ec;
|
|
{
|
|
llvm::raw_fd_ostream asmStream(asmFile->first, ec);
|
|
if (ec)
|
|
return emitError(loc) << "Couldn't open the file: `" << asmFile->first
|
|
<< "`, error message: " << ec.message();
|
|
|
|
asmStream << asmStr;
|
|
if (asmStream.has_error())
|
|
return emitError(loc)
|
|
<< "An error occurred while writing the assembly to: `"
|
|
<< asmFile->first << "`.";
|
|
|
|
asmStream.flush();
|
|
}
|
|
// Set cmd options
|
|
std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
|
|
targetOptions.tokenizeCmdOptions();
|
|
// Example: --gpu-module-to-binary="opts='opt1 opt2'"
|
|
const std::string cmdOptsStr = "\"" + llvm::join(cmdOpts.second, " ") + "\"";
|
|
SmallVector<StringRef, 12> oclocArgs(
|
|
{"ocloc", "compile", "-file", asmFile->first, inputFormat, "-device",
|
|
getTarget().getChip(), "-output", binFile->first, "-output_no_suffix",
|
|
"-options", cmdOptsStr});
|
|
|
|
// Dump tool invocation commands.
|
|
#define DEBUG_TYPE "serialize-to-binary"
|
|
LLVM_DEBUG({
|
|
llvm::dbgs() << "Tool invocation for module: "
|
|
<< getGPUModuleOp().getNameAttr() << "\n";
|
|
llvm::interleave(oclocArgs, llvm::dbgs(), " ");
|
|
llvm::dbgs() << "\n";
|
|
});
|
|
#undef DEBUG_TYPE
|
|
// Helper function for printing tool error logs.
|
|
std::string message;
|
|
auto emitLogError =
|
|
[&](StringRef toolName) -> FailureOr<SmallVector<char, 0>> {
|
|
if (message.empty()) {
|
|
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =
|
|
llvm::MemoryBuffer::getFile(logFile->first);
|
|
if (toolStderr)
|
|
return emitError(loc) << toolName << " invocation failed. Log:\n"
|
|
<< toolStderr->get()->getBuffer();
|
|
else
|
|
return emitError(loc) << toolName << " invocation failed.";
|
|
}
|
|
return emitError(loc) << toolName
|
|
<< " invocation failed, error message: " << message;
|
|
};
|
|
std::optional<StringRef> redirects[] = {
|
|
std::nullopt,
|
|
logFile->first,
|
|
logFile->first,
|
|
};
|
|
// Invoke ocloc.
|
|
if (llvm::sys::ExecuteAndWait(oclocCompiler.value(), oclocArgs, std::nullopt,
|
|
redirects, 0, 0, &message))
|
|
return emitLogError("`ocloc`");
|
|
binFile->first.append(".bin");
|
|
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =
|
|
llvm::MemoryBuffer::getFile(binFile->first);
|
|
if (!binaryBuffer)
|
|
return emitError(loc) << "Couldn't open the file: `" << binFile->first
|
|
<< "`, error message: "
|
|
<< binaryBuffer.getError().message();
|
|
|
|
StringRef bin = (*binaryBuffer)->getBuffer();
|
|
return SmallVector<char, 0>(bin.begin(), bin.end());
|
|
}
|
|
|
|
std::optional<std::string> SerializeGPUModuleBase::findTool(StringRef tool) {
|
|
// 1. Check the toolkit path given in the command line.
|
|
StringRef pathRef = targetOptions.getToolkitPath();
|
|
SmallVector<char, 256> path;
|
|
if (!pathRef.empty()) {
|
|
path.insert(path.begin(), pathRef.begin(), pathRef.end());
|
|
llvm::sys::path::append(path, "bin", tool);
|
|
if (llvm::sys::fs::can_execute(path))
|
|
return StringRef(path.data(), path.size()).str();
|
|
}
|
|
// 2. Check PATH.
|
|
if (std::optional<std::string> toolPath =
|
|
llvm::sys::Process::FindInEnvPath("PATH", tool))
|
|
return *toolPath;
|
|
|
|
getGPUModuleOp().emitError()
|
|
<< "Couldn't find the `" << tool
|
|
<< "` binary. Please specify the toolkit "
|
|
"path via GpuModuleToBinaryPass or add the compiler to $PATH`.";
|
|
return std::nullopt;
|
|
}
|
|
|
|
namespace {
|
|
class SPIRVSerializer : public SerializeGPUModuleBase {
|
|
public:
|
|
SPIRVSerializer(Operation &module, XeVMTargetAttr xeTarget,
|
|
const gpu::TargetOptions &targetOptions)
|
|
: SerializeGPUModuleBase(module, xeTarget, targetOptions) {}
|
|
|
|
static void init();
|
|
|
|
/// Serializes the LLVM module to an object format, depending on the
|
|
/// compilation target selected in target options.
|
|
FailureOr<SmallVector<char, 0>>
|
|
moduleToObject(llvm::Module &llvmModule) override;
|
|
|
|
private:
|
|
/// Translates the LLVM module to SPIR-V binary using LLVM's
|
|
/// SPIR-V target.
|
|
std::optional<std::string>
|
|
translateToSPIRVBinary(llvm::Module &llvmModule,
|
|
llvm::TargetMachine &targetMachine);
|
|
};
|
|
} // namespace
|
|
|
|
void SPIRVSerializer::init() {
|
|
static llvm::once_flag initializeBackendOnce;
|
|
llvm::call_once(initializeBackendOnce, []() {
|
|
#if LLVM_HAS_SPIRV_TARGET
|
|
LLVMInitializeSPIRVTarget();
|
|
LLVMInitializeSPIRVTargetInfo();
|
|
LLVMInitializeSPIRVTargetMC();
|
|
LLVMInitializeSPIRVAsmPrinter();
|
|
#endif
|
|
});
|
|
}
|
|
|
|
FailureOr<SmallVector<char, 0>>
|
|
SPIRVSerializer::moduleToObject(llvm::Module &llvmModule) {
|
|
#define DEBUG_TYPE "serialize-to-llvm"
|
|
LLVM_DEBUG({
|
|
llvm::dbgs() << "LLVM IR for module: " << getGPUModuleOp().getNameAttr()
|
|
<< "\n";
|
|
llvm::dbgs() << llvmModule << "\n";
|
|
llvm::dbgs().flush();
|
|
});
|
|
#undef DEBUG_TYPE
|
|
|
|
// Return LLVM IR if the compilation target is `offload`.
|
|
if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
|
|
return SerializeGPUModuleBase::moduleToObject(llvmModule);
|
|
|
|
#if !LLVM_HAS_SPIRV_TARGET
|
|
return getGPUModuleOp()->emitError(
|
|
"The `SPIRV` target was not built. Please enable "
|
|
"it when building LLVM.");
|
|
#endif // LLVM_HAS_SPIRV_TARGET
|
|
|
|
FailureOr<llvm::TargetMachine *> targetMachine = getOrCreateTargetMachine();
|
|
if (failed(targetMachine))
|
|
return getGPUModuleOp().emitError()
|
|
<< "Target Machine unavailable for triple " << triple
|
|
<< ", can't optimize with LLVM\n";
|
|
|
|
// Return SPIRV if the compilation target is `assembly`.
|
|
if (targetOptions.getCompilationTarget() ==
|
|
gpu::CompilationTarget::Assembly) {
|
|
FailureOr<SmallString<0>> serializedISA =
|
|
translateModuleToISA(llvmModule, **targetMachine,
|
|
[&]() { return getGPUModuleOp().emitError(); });
|
|
if (failed(serializedISA))
|
|
return getGPUModuleOp().emitError()
|
|
<< "Failed translating the module to ISA." << triple
|
|
<< ", can't compile with LLVM\n";
|
|
|
|
#define DEBUG_TYPE "serialize-to-isa"
|
|
LLVM_DEBUG({
|
|
llvm::dbgs() << "SPIR-V for module: " << getGPUModuleOp().getNameAttr()
|
|
<< "\n";
|
|
llvm::dbgs() << *serializedISA << "\n";
|
|
llvm::dbgs().flush();
|
|
});
|
|
#undef DEBUG_TYPE
|
|
|
|
// Make sure to include the null terminator.
|
|
StringRef bin(serializedISA->c_str(), serializedISA->size() + 1);
|
|
return SmallVector<char, 0>(bin.begin(), bin.end());
|
|
}
|
|
|
|
// Level zero runtime is set up to accept SPIR-V binary
|
|
// translateToSPIRVBinary translates the LLVM module to SPIR-V binary
|
|
// using LLVM's SPIRV target.
|
|
// compileToBinary can be used in the future if level zero runtime
|
|
// implementation switches to native XeVM binary format.
|
|
std::optional<std::string> serializedSPIRVBinary =
|
|
translateToSPIRVBinary(llvmModule, **targetMachine);
|
|
if (!serializedSPIRVBinary)
|
|
return getGPUModuleOp().emitError()
|
|
<< "Failed translating the module to Binary.";
|
|
|
|
if (serializedSPIRVBinary->size() % 4)
|
|
return getGPUModuleOp().emitError()
|
|
<< "SPIRV code size must be a multiple of 4.";
|
|
|
|
StringRef bin(serializedSPIRVBinary->c_str(), serializedSPIRVBinary->size());
|
|
return SmallVector<char, 0>(bin.begin(), bin.end());
|
|
}
|
|
|
|
std::optional<std::string>
|
|
SPIRVSerializer::translateToSPIRVBinary(llvm::Module &llvmModule,
|
|
llvm::TargetMachine &targetMachine) {
|
|
std::string targetISA;
|
|
llvm::raw_string_ostream stream(targetISA);
|
|
|
|
{ // Drop pstream after this to prevent the ISA from being stuck buffering
|
|
llvm::buffer_ostream pstream(stream);
|
|
llvm::legacy::PassManager codegenPasses;
|
|
if (targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
|
|
llvm::CodeGenFileType::ObjectFile))
|
|
return std::nullopt;
|
|
|
|
codegenPasses.run(llvmModule);
|
|
}
|
|
return targetISA;
|
|
}
|
|
|
|
std::optional<mlir::gpu::SerializedObject>
|
|
XeVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
|
|
const gpu::TargetOptions &options) const {
|
|
if (!module)
|
|
return std::nullopt;
|
|
auto gpuMod = dyn_cast<gpu::GPUModuleOp>(module);
|
|
if (!gpuMod) {
|
|
module->emitError("expected to be a gpu.module op");
|
|
return std::nullopt;
|
|
}
|
|
auto xeTarget = cast<XeVMTargetAttr>(attribute);
|
|
if (xeTarget.getTriple().starts_with("spirv")) {
|
|
gpuMod.walk([&](LLVM::LLVMFuncOp funcOp) {
|
|
if (funcOp->hasAttr(gpu::GPUDialect::getKernelFuncAttrName())) {
|
|
funcOp.setIntelReqdSubGroupSize(16);
|
|
return WalkResult::interrupt();
|
|
}
|
|
return WalkResult::advance();
|
|
});
|
|
|
|
SPIRVSerializer serializer(*module, cast<XeVMTargetAttr>(attribute),
|
|
options);
|
|
serializer.init();
|
|
|
|
#if !LLVM_HAS_SPIRV_TARGET
|
|
module->emitError("Cannot run `TargetRegistry::lookupTarget()` for SPIRV "
|
|
"without having the target built.");
|
|
#endif
|
|
|
|
std::optional<SmallVector<char, 0>> binary = serializer.run();
|
|
if (!binary)
|
|
return std::nullopt;
|
|
return gpu::SerializedObject{std::move(*binary)};
|
|
}
|
|
module->emitError("Unsupported XeVM target triple: ") << xeTarget.getTriple();
|
|
return std::nullopt;
|
|
}
|
|
|
|
Attribute
|
|
XeVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
|
|
const mlir::gpu::SerializedObject &object,
|
|
const gpu::TargetOptions &options) const {
|
|
Builder builder(attribute.getContext());
|
|
gpu::CompilationTarget format = options.getCompilationTarget();
|
|
auto xeTarget = cast<XeVMTargetAttr>(attribute);
|
|
SmallVector<NamedAttribute, 2> properties;
|
|
if (format == gpu::CompilationTarget::Assembly)
|
|
properties.push_back(
|
|
builder.getNamedAttr("O", builder.getI32IntegerAttr(xeTarget.getO())));
|
|
|
|
DictionaryAttr objectProps;
|
|
if (!properties.empty())
|
|
objectProps = builder.getDictionaryAttr(properties);
|
|
|
|
return builder.getAttr<gpu::ObjectAttr>(
|
|
attribute, format,
|
|
builder.getStringAttr(
|
|
StringRef(object.getObject().data(), object.getObject().size())),
|
|
objectProps, /*kernels=*/nullptr);
|
|
}
|