[MLIR][GPU] Update serializeToObject to use SerializedObject wrapper and include ISA compiler logs (#176697)
This PR makes the compilation log from ISA compiler available to users by returning it as part of the `gpu::ObjectAttr` properties, following the existing pattern like `LLVMIRToISATimeInMs`. Currently, the compiler log (which contains useful information such as spill statistics when --verbose is passed) is only accessible in debug builds via `LLVM_DEBUG`. However, there are good reasons to make this information available in release builds as well: 1. Both `ptxas` and `libnvptxcompiler` are publicly available tools/libraries distributed with the CUDA Toolkit. The `--verbose` flag and its output are documented public features, not internal debug information. 2. The verbose output provides valuable insights for users. A new `SerializedObject` class is used to carry the metadata alongside the binary when returning from `serializeObject`.
This commit is contained in:
parent
1ce7a81593
commit
fbffdaa174
@ -37,7 +37,7 @@ def GPUTargetAttrInterface : AttrInterface<"TargetAttrInterface"> {
|
||||
is meant to be used for passing additional options that are not in the
|
||||
attribute.
|
||||
}],
|
||||
"std::optional<::mlir::SmallVector<char, 0>>", "serializeToObject",
|
||||
"std::optional<SerializedObject>", "serializeToObject",
|
||||
(ins "::mlir::Operation*":$module,
|
||||
"const ::mlir::gpu::TargetOptions&":$options)>,
|
||||
InterfaceMethod<[{
|
||||
@ -50,7 +50,7 @@ def GPUTargetAttrInterface : AttrInterface<"TargetAttrInterface"> {
|
||||
attribute.
|
||||
}], "::mlir::Attribute", "createObject",
|
||||
(ins "::mlir::Operation *":$module,
|
||||
"const ::llvm::SmallVector<char, 0> &":$object,
|
||||
"const ::mlir::gpu::SerializedObject &":$object,
|
||||
"const ::mlir::gpu::TargetOptions &":$options)>
|
||||
];
|
||||
}
|
||||
|
||||
@ -14,6 +14,7 @@
|
||||
#define MLIR_DIALECT_GPU_IR_COMPILATIONINTERFACES_H
|
||||
|
||||
#include "mlir/IR/Attributes.h"
|
||||
#include "mlir/IR/BuiltinAttributes.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
|
||||
namespace llvm {
|
||||
@ -170,6 +171,24 @@ protected:
|
||||
private:
|
||||
TypeID typeID;
|
||||
};
|
||||
|
||||
/// This class represents a serialized object (GPU binary) with metadata (e.g.
|
||||
/// timings, logs, ...).
|
||||
class SerializedObject {
|
||||
public:
|
||||
SerializedObject(::mlir::SmallVector<char, 0> object,
|
||||
DictionaryAttr metadata = {})
|
||||
: object(std::move(object)), metadata(metadata) {}
|
||||
|
||||
const SmallVector<char, 0> &getObject() const { return object; }
|
||||
|
||||
DictionaryAttr getMetadata() const { return metadata; }
|
||||
|
||||
private:
|
||||
SmallVector<char, 0> object;
|
||||
DictionaryAttr metadata;
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace mlir
|
||||
|
||||
|
||||
@ -157,6 +157,9 @@ def GpuNVVMAttachTarget: Pass<"nvvm-attach-target", ""> {
|
||||
Option<"ftzFlag", "ftz", "bool",
|
||||
/*default=*/"false",
|
||||
"Enable flush to zero for denormals.">,
|
||||
Option<"compilerDiagnosticsFlag", "collect-compiler-diagnostics", "bool",
|
||||
/*default=*/"false",
|
||||
"Enable collection of compiler diagnostics.">,
|
||||
ListOption<"linkLibs", "l", "std::string",
|
||||
"Extra bitcode libraries paths to link to.">,
|
||||
Option<"cmdOptions", "ptxas-cmd-options", "std::string",
|
||||
|
||||
@ -87,7 +87,7 @@ LogicalResult moduleSerializer(GPUModuleOp op,
|
||||
auto target = dyn_cast<gpu::TargetAttrInterface>(targetAttr);
|
||||
assert(target &&
|
||||
"Target attribute doesn't implements `TargetAttrInterface`.");
|
||||
std::optional<SmallVector<char, 0>> serializedModule =
|
||||
std::optional<SerializedObject> serializedModule =
|
||||
target.serializeToObject(op, targetOptions);
|
||||
if (!serializedModule) {
|
||||
op.emitError("An error happened while serializing the module.");
|
||||
|
||||
@ -53,6 +53,8 @@ DictionaryAttr NVVMAttachTarget::getFlags(OpBuilder &builder) const {
|
||||
addFlag("fast");
|
||||
if (ftzFlag)
|
||||
addFlag("ftz");
|
||||
if (compilerDiagnosticsFlag)
|
||||
addFlag("collect-compiler-diagnostics");
|
||||
|
||||
// Tokenize and set the optional command line options.
|
||||
if (!cmdOptions.empty()) {
|
||||
|
||||
@ -59,12 +59,12 @@ namespace {
|
||||
class NVVMTargetAttrImpl
|
||||
: public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
|
||||
public:
|
||||
std::optional<SmallVector<char, 0>>
|
||||
std::optional<mlir::gpu::SerializedObject>
|
||||
serializeToObject(Attribute attribute, Operation *module,
|
||||
const gpu::TargetOptions &options) const;
|
||||
|
||||
Attribute createObject(Attribute attribute, Operation *module,
|
||||
const SmallVector<char, 0> &object,
|
||||
const mlir::gpu::SerializedObject &object,
|
||||
const gpu::TargetOptions &options) const;
|
||||
};
|
||||
} // namespace
|
||||
@ -232,6 +232,9 @@ public:
|
||||
/// is LLVMIR or ISA.
|
||||
std::optional<int64_t> getISAToBinaryTimeInMs();
|
||||
|
||||
/// Get the compiler log from ISA compiler.
|
||||
StringRef getISACompilerLog() const;
|
||||
|
||||
private:
|
||||
using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
|
||||
|
||||
@ -253,6 +256,9 @@ private:
|
||||
|
||||
/// ISA->Binary perf result.
|
||||
std::optional<int64_t> isaToBinaryTimeInMs;
|
||||
|
||||
/// Compiler log from ptxas or libnvptxcompiler.
|
||||
std::string isaCompilerLog;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
@ -285,6 +291,8 @@ std::optional<int64_t> NVPTXSerializer::getISAToBinaryTimeInMs() {
|
||||
return isaToBinaryTimeInMs;
|
||||
}
|
||||
|
||||
StringRef NVPTXSerializer::getISACompilerLog() const { return isaCompilerLog; }
|
||||
|
||||
gpu::GPUModuleOp NVPTXSerializer::getOperation() {
|
||||
return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
|
||||
}
|
||||
@ -484,6 +492,11 @@ NVPTXSerializer::compileToBinary(StringRef ptxCode) {
|
||||
/*MemoryLimit=*/0,
|
||||
/*ErrMsg=*/&message))
|
||||
return emitLogError("`ptxas`");
|
||||
|
||||
if (target.hasFlag("collect-compiler-diagnostics")) {
|
||||
if (auto logBuffer = llvm::MemoryBuffer::getFile(logFile->first))
|
||||
isaCompilerLog = (*logBuffer)->getBuffer().str();
|
||||
}
|
||||
#define DEBUG_TYPE "dump-sass"
|
||||
LLVM_DEBUG({
|
||||
std::optional<std::string> nvdisasm = findTool("nvdisasm");
|
||||
@ -547,7 +560,7 @@ NVPTXSerializer::compileToBinary(StringRef ptxCode) {
|
||||
if (auto status = (expr)) { \
|
||||
emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ") \
|
||||
<< status; \
|
||||
return failure(); \
|
||||
return failure(); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
@ -559,7 +572,7 @@ NVPTXSerializer::compileToBinary(StringRef ptxCode) {
|
||||
if (result != nvFatbinResult::NVFATBIN_SUCCESS) { \
|
||||
emitError(loc) << llvm::Twine(#expr).concat(" failed with error: ") \
|
||||
<< nvFatbinGetErrorString(result); \
|
||||
return failure(); \
|
||||
return failure(); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
@ -611,21 +624,32 @@ NVPTXSerializer::compileToBinaryNVPTX(StringRef ptxCode) {
|
||||
RETURN_ON_NVPTXCOMPILER_ERROR(
|
||||
nvPTXCompilerGetCompiledProgram(compiler, (void *)binary.data()));
|
||||
|
||||
// Lambda to fetch info log; returns empty vector on failure or no log.
|
||||
auto fetchInfoLog = [&]() -> SmallVector<char> {
|
||||
size_t size = 0;
|
||||
if (nvPTXCompilerGetInfoLogSize(compiler, &size) != NVPTXCOMPILE_SUCCESS ||
|
||||
size == 0)
|
||||
return {};
|
||||
SmallVector<char> log(size + 1, 0);
|
||||
if (nvPTXCompilerGetInfoLog(compiler, log.data()) != NVPTXCOMPILE_SUCCESS)
|
||||
return {};
|
||||
return log;
|
||||
};
|
||||
|
||||
if (target.hasFlag("collect-compiler-diagnostics")) {
|
||||
if (auto log = fetchInfoLog(); !log.empty())
|
||||
isaCompilerLog = log.data();
|
||||
}
|
||||
|
||||
// Dump the log of the compiler, helpful if the verbose flag was passed.
|
||||
#define DEBUG_TYPE "serialize-to-binary"
|
||||
LLVM_DEBUG({
|
||||
RETURN_ON_NVPTXCOMPILER_ERROR(
|
||||
nvPTXCompilerGetInfoLogSize(compiler, &logSize));
|
||||
if (logSize != 0) {
|
||||
SmallVector<char> log(logSize + 1, 0);
|
||||
RETURN_ON_NVPTXCOMPILER_ERROR(
|
||||
nvPTXCompilerGetInfoLog(compiler, log.data()));
|
||||
if (auto log = fetchInfoLog(); !log.empty())
|
||||
LDBG() << "NVPTX compiler invocation for module: "
|
||||
<< getOperation().getNameAttr()
|
||||
<< "\nArguments: " << llvm::interleaved(cmdOpts.second, " ")
|
||||
<< "\nOutput\n"
|
||||
<< log.data();
|
||||
}
|
||||
});
|
||||
#undef DEBUG_TYPE
|
||||
RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
|
||||
@ -725,7 +749,7 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
|
||||
return result;
|
||||
}
|
||||
|
||||
std::optional<SmallVector<char, 0>>
|
||||
std::optional<mlir::gpu::SerializedObject>
|
||||
NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
|
||||
const gpu::TargetOptions &options) const {
|
||||
Builder builder(attribute.getContext());
|
||||
@ -739,26 +763,38 @@ NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
|
||||
NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options);
|
||||
serializer.init();
|
||||
std::optional<SmallVector<char, 0>> result = serializer.run();
|
||||
if (!result)
|
||||
return std::nullopt;
|
||||
|
||||
SmallVector<NamedAttribute, 4> properties;
|
||||
auto llvmToISATimeInMs = serializer.getLLVMIRToISATimeInMs();
|
||||
if (llvmToISATimeInMs.has_value())
|
||||
module->setAttr("LLVMIRToISATimeInMs",
|
||||
builder.getI64IntegerAttr(*llvmToISATimeInMs));
|
||||
properties.push_back(builder.getNamedAttr(
|
||||
"LLVMIRToISATimeInMs", builder.getI64IntegerAttr(*llvmToISATimeInMs)));
|
||||
auto isaToBinaryTimeInMs = serializer.getISAToBinaryTimeInMs();
|
||||
if (isaToBinaryTimeInMs.has_value())
|
||||
module->setAttr("ISAToBinaryTimeInMs",
|
||||
builder.getI64IntegerAttr(*isaToBinaryTimeInMs));
|
||||
return result;
|
||||
properties.push_back(
|
||||
builder.getNamedAttr("ISAToBinaryTimeInMs",
|
||||
builder.getI64IntegerAttr(*isaToBinaryTimeInMs)));
|
||||
StringRef isaCompilerLog = serializer.getISACompilerLog();
|
||||
if (!isaCompilerLog.empty())
|
||||
properties.push_back(builder.getNamedAttr(
|
||||
"ISACompilerLog", builder.getStringAttr(isaCompilerLog)));
|
||||
|
||||
return gpu::SerializedObject{std::move(*result),
|
||||
builder.getDictionaryAttr(properties)};
|
||||
}
|
||||
|
||||
Attribute
|
||||
NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
|
||||
const SmallVector<char, 0> &object,
|
||||
const mlir::gpu::SerializedObject &object,
|
||||
const gpu::TargetOptions &options) const {
|
||||
auto target = cast<NVVMTargetAttr>(attribute);
|
||||
gpu::CompilationTarget format = options.getCompilationTarget();
|
||||
DictionaryAttr objectProps;
|
||||
Builder builder(attribute.getContext());
|
||||
SmallVector<NamedAttribute, 4> properties;
|
||||
SmallVector<NamedAttribute> properties =
|
||||
llvm::to_vector(object.getMetadata().getValue());
|
||||
if (format == gpu::CompilationTarget::Assembly)
|
||||
properties.push_back(
|
||||
builder.getNamedAttr("O", builder.getI32IntegerAttr(target.getO())));
|
||||
@ -767,19 +803,12 @@ NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
|
||||
properties.push_back(builder.getNamedAttr(gpu::elfSectionName,
|
||||
builder.getStringAttr(section)));
|
||||
|
||||
for (const auto *perfName : {"LLVMIRToISATimeInMs", "ISAToBinaryTimeInMs"}) {
|
||||
if (module->hasAttr(perfName)) {
|
||||
IntegerAttr attr = llvm::dyn_cast<IntegerAttr>(module->getAttr(perfName));
|
||||
properties.push_back(builder.getNamedAttr(
|
||||
perfName, builder.getI64IntegerAttr(attr.getInt())));
|
||||
}
|
||||
}
|
||||
|
||||
if (!properties.empty())
|
||||
objectProps = builder.getDictionaryAttr(properties);
|
||||
|
||||
return builder.getAttr<gpu::ObjectAttr>(
|
||||
attribute, format,
|
||||
builder.getStringAttr(StringRef(object.data(), object.size())),
|
||||
builder.getStringAttr(
|
||||
StringRef(object.getObject().data(), object.getObject().size())),
|
||||
objectProps, /*kernels=*/nullptr);
|
||||
}
|
||||
|
||||
@ -55,12 +55,12 @@ namespace {
|
||||
class ROCDLTargetAttrImpl
|
||||
: public gpu::TargetAttrInterface::FallbackModel<ROCDLTargetAttrImpl> {
|
||||
public:
|
||||
std::optional<SmallVector<char, 0>>
|
||||
std::optional<mlir::gpu::SerializedObject>
|
||||
serializeToObject(Attribute attribute, Operation *module,
|
||||
const gpu::TargetOptions &options) const;
|
||||
|
||||
Attribute createObject(Attribute attribute, Operation *module,
|
||||
const SmallVector<char, 0> &object,
|
||||
const mlir::gpu::SerializedObject &object,
|
||||
const gpu::TargetOptions &options) const;
|
||||
};
|
||||
} // namespace
|
||||
@ -473,7 +473,8 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
|
||||
}
|
||||
#endif // MLIR_ENABLE_ROCM_CONVERSIONS
|
||||
|
||||
std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
|
||||
std::optional<mlir::gpu::SerializedObject>
|
||||
ROCDLTargetAttrImpl::serializeToObject(
|
||||
Attribute attribute, Operation *module,
|
||||
const gpu::TargetOptions &options) const {
|
||||
assert(module && "The module must be non null.");
|
||||
@ -487,7 +488,10 @@ std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
|
||||
AMDGPUSerializer serializer(*module, cast<ROCDLTargetAttr>(attribute),
|
||||
options);
|
||||
serializer.init();
|
||||
return serializer.run();
|
||||
std::optional<SmallVector<char, 0>> binary = serializer.run();
|
||||
if (!binary)
|
||||
return std::nullopt;
|
||||
return gpu::SerializedObject{std::move(*binary)};
|
||||
#else
|
||||
module->emitError("the `AMDGPU` target was not built. Please enable it when "
|
||||
"building LLVM");
|
||||
@ -497,7 +501,7 @@ std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
|
||||
|
||||
Attribute
|
||||
ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module,
|
||||
const SmallVector<char, 0> &object,
|
||||
const mlir::gpu::SerializedObject &object,
|
||||
const gpu::TargetOptions &options) const {
|
||||
gpu::CompilationTarget format = options.getCompilationTarget();
|
||||
// If format is `fatbin` transform it to binary as `fatbin` is not yet
|
||||
@ -505,12 +509,12 @@ ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module,
|
||||
gpu::KernelTableAttr kernels;
|
||||
if (format > gpu::CompilationTarget::Binary) {
|
||||
format = gpu::CompilationTarget::Binary;
|
||||
kernels = ROCDL::getKernelMetadata(module, object);
|
||||
kernels = ROCDL::getKernelMetadata(module, object.getObject());
|
||||
}
|
||||
DictionaryAttr properties{};
|
||||
Builder builder(attribute.getContext());
|
||||
StringAttr objectStr =
|
||||
builder.getStringAttr(StringRef(object.data(), object.size()));
|
||||
StringAttr objectStr = builder.getStringAttr(
|
||||
StringRef(object.getObject().data(), object.getObject().size()));
|
||||
return builder.getAttr<gpu::ObjectAttr>(attribute, format, objectStr,
|
||||
properties, kernels);
|
||||
}
|
||||
|
||||
@ -51,12 +51,12 @@ namespace {
|
||||
class XeVMTargetAttrImpl
|
||||
: public gpu::TargetAttrInterface::FallbackModel<XeVMTargetAttrImpl> {
|
||||
public:
|
||||
std::optional<SmallVector<char, 0>>
|
||||
std::optional<mlir::gpu::SerializedObject>
|
||||
serializeToObject(Attribute attribute, Operation *module,
|
||||
const gpu::TargetOptions &options) const;
|
||||
|
||||
Attribute createObject(Attribute attribute, Operation *module,
|
||||
const SmallVector<char, 0> &object,
|
||||
const mlir::gpu::SerializedObject &object,
|
||||
const gpu::TargetOptions &options) const;
|
||||
};
|
||||
} // namespace
|
||||
@ -354,7 +354,7 @@ SPIRVSerializer::translateToSPIRVBinary(llvm::Module &llvmModule,
|
||||
return targetISA;
|
||||
}
|
||||
|
||||
std::optional<SmallVector<char, 0>>
|
||||
std::optional<mlir::gpu::SerializedObject>
|
||||
XeVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
|
||||
const gpu::TargetOptions &options) const {
|
||||
if (!module)
|
||||
@ -383,7 +383,10 @@ XeVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
|
||||
"without having the target built.");
|
||||
#endif
|
||||
|
||||
return serializer.run();
|
||||
std::optional<SmallVector<char, 0>> binary = serializer.run();
|
||||
if (!binary)
|
||||
return std::nullopt;
|
||||
return gpu::SerializedObject{std::move(*binary)};
|
||||
}
|
||||
module->emitError("Unsupported XeVM target triple: ") << xeTarget.getTriple();
|
||||
return std::nullopt;
|
||||
@ -391,7 +394,7 @@ XeVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
|
||||
|
||||
Attribute
|
||||
XeVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
|
||||
const SmallVector<char, 0> &object,
|
||||
const mlir::gpu::SerializedObject &object,
|
||||
const gpu::TargetOptions &options) const {
|
||||
Builder builder(attribute.getContext());
|
||||
gpu::CompilationTarget format = options.getCompilationTarget();
|
||||
@ -407,6 +410,7 @@ XeVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
|
||||
|
||||
return builder.getAttr<gpu::ObjectAttr>(
|
||||
attribute, format,
|
||||
builder.getStringAttr(StringRef(object.data(), object.size())),
|
||||
builder.getStringAttr(
|
||||
StringRef(object.getObject().data(), object.getObject().size())),
|
||||
objectProps, /*kernels=*/nullptr);
|
||||
}
|
||||
|
||||
@ -30,12 +30,12 @@ namespace {
|
||||
class SPIRVTargetAttrImpl
|
||||
: public gpu::TargetAttrInterface::FallbackModel<SPIRVTargetAttrImpl> {
|
||||
public:
|
||||
std::optional<SmallVector<char, 0>>
|
||||
std::optional<mlir::gpu::SerializedObject>
|
||||
serializeToObject(Attribute attribute, Operation *module,
|
||||
const gpu::TargetOptions &options) const;
|
||||
|
||||
Attribute createObject(Attribute attribute, Operation *module,
|
||||
const SmallVector<char, 0> &object,
|
||||
const mlir::gpu::SerializedObject &object,
|
||||
const gpu::TargetOptions &options) const;
|
||||
};
|
||||
} // namespace
|
||||
@ -56,7 +56,8 @@ void mlir::spirv::registerSPIRVTargetInterfaceExternalModels(
|
||||
}
|
||||
|
||||
// Reuse from existing serializer
|
||||
std::optional<SmallVector<char, 0>> SPIRVTargetAttrImpl::serializeToObject(
|
||||
std::optional<mlir::gpu::SerializedObject>
|
||||
SPIRVTargetAttrImpl::serializeToObject(
|
||||
Attribute attribute, Operation *module,
|
||||
const gpu::TargetOptions &options) const {
|
||||
if (!module)
|
||||
@ -84,19 +85,20 @@ std::optional<SmallVector<char, 0>> SPIRVTargetAttrImpl::serializeToObject(
|
||||
std::memcpy(spvData.data(), spvBinary.data(), spvData.size());
|
||||
|
||||
spvMod.erase();
|
||||
return spvData;
|
||||
return gpu::SerializedObject{std::move(spvData)};
|
||||
}
|
||||
|
||||
// Prepare Attribute for gpu.binary with serialized kernel object
|
||||
Attribute
|
||||
SPIRVTargetAttrImpl::createObject(Attribute attribute, Operation *module,
|
||||
const SmallVector<char, 0> &object,
|
||||
const mlir::gpu::SerializedObject &object,
|
||||
const gpu::TargetOptions &options) const {
|
||||
gpu::CompilationTarget format = options.getCompilationTarget();
|
||||
DictionaryAttr objectProps;
|
||||
Builder builder(attribute.getContext());
|
||||
return builder.getAttr<gpu::ObjectAttr>(
|
||||
attribute, format,
|
||||
builder.getStringAttr(StringRef(object.data(), object.size())),
|
||||
builder.getStringAttr(
|
||||
StringRef(object.getObject().data(), object.getObject().size())),
|
||||
objectProps, /*kernels=*/nullptr);
|
||||
}
|
||||
|
||||
@ -1,12 +1,14 @@
|
||||
// RUN: mlir-opt %s --nvvm-attach-target="" | FileCheck %s
|
||||
// RUN: mlir-opt %s --nvvm-attach-target="ptxas-cmd-options=--register-usage-level=8" | FileCheck %s -check-prefix=CHECK-OPTIONS
|
||||
// RUN: mlir-opt %s --nvvm-attach-target="verify-target-arch=false" | FileCheck %s -check-prefix=CHECK-DISABLE-VERIFYTARGET
|
||||
// RUN: mlir-opt %s --nvvm-attach-target="collect-compiler-diagnostics=true" | FileCheck %s -check-prefix=CHECK-DIAG
|
||||
|
||||
module attributes {gpu.container_module} {
|
||||
// CHECK-LABEL:gpu.module @kernel_module1
|
||||
// CHECK: [#nvvm.target]
|
||||
// CHECK-OPTIONS: [#nvvm.target<flags = {"ptxas-cmd-options" = ["--register-usage-level=8"]}>]
|
||||
// CHECK-DISABLE-VERIFYTARGET: [#nvvm.target<verifyTarget = false>]
|
||||
// CHECK-DIAG: [#nvvm.target<flags = {"collect-compiler-diagnostics"}>]
|
||||
gpu.module @kernel_module1 {
|
||||
llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr,
|
||||
%arg2: !llvm.ptr, %arg3: i64, %arg4: i64,
|
||||
|
||||
@ -0,0 +1,14 @@
|
||||
// RUN: mlir-opt %s --gpu-module-to-binary="format=%gpu_compilation_format opts=--verbose" \
|
||||
// RUN: | FileCheck %s
|
||||
|
||||
module attributes {gpu.container_module} {
|
||||
// CHECK-LABEL: gpu.binary @kernel_module
|
||||
// CHECK: properties = {{{.*}}ISACompilerLog = {{.*}}
|
||||
gpu.module @kernel_module [#nvvm.target<chip = "sm_70", flags = {"collect-compiler-diagnostics"}>] {
|
||||
llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr,
|
||||
%arg2: !llvm.ptr, %arg3: i64, %arg4: i64,
|
||||
%arg5: i64) attributes {gpu.kernel} {
|
||||
llvm.return
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -86,15 +86,16 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMMToLLVM)) {
|
||||
ASSERT_TRUE(!!serializer);
|
||||
gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Offload);
|
||||
for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
|
||||
std::optional<SmallVector<char, 0>> object =
|
||||
std::optional<mlir::gpu::SerializedObject> object =
|
||||
serializer.serializeToObject(gpuModule, options);
|
||||
// Check that the serializer was successful.
|
||||
ASSERT_TRUE(object != std::nullopt);
|
||||
ASSERT_TRUE(!object->empty());
|
||||
ASSERT_TRUE(!object->getObject().empty());
|
||||
|
||||
// Read the serialized module.
|
||||
llvm::MemoryBufferRef buffer(StringRef(object->data(), object->size()),
|
||||
"module");
|
||||
llvm::MemoryBufferRef buffer(
|
||||
StringRef(object->getObject().data(), object->getObject().size()),
|
||||
"module");
|
||||
llvm::LLVMContext llvmContext;
|
||||
llvm::Expected<std::unique_ptr<llvm::Module>> llvmModule =
|
||||
llvm::getLazyBitcodeModule(buffer, llvmContext);
|
||||
@ -122,15 +123,18 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMToPTX)) {
|
||||
ASSERT_TRUE(!!serializer);
|
||||
gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Assembly);
|
||||
for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
|
||||
std::optional<SmallVector<char, 0>> object =
|
||||
std::optional<mlir::gpu::SerializedObject> object =
|
||||
serializer.serializeToObject(gpuModule, options);
|
||||
// Check that the serializer was successful.
|
||||
ASSERT_TRUE(object != std::nullopt);
|
||||
ASSERT_TRUE(!object->empty());
|
||||
ASSERT_TRUE(!object->getObject().empty());
|
||||
|
||||
ASSERT_TRUE(
|
||||
StringRef(object->data(), object->size()).contains("nvvm_kernel"));
|
||||
ASSERT_TRUE(StringRef(object->data(), object->size()).count('\0') == 0);
|
||||
StringRef(object->getObject().data(), object->getObject().size())
|
||||
.contains("nvvm_kernel"));
|
||||
ASSERT_TRUE(
|
||||
StringRef(object->getObject().data(), object->getObject().size())
|
||||
.count('\0') == 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -153,11 +157,11 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMToBinary)) {
|
||||
ASSERT_TRUE(!!serializer);
|
||||
gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Binary);
|
||||
for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
|
||||
std::optional<SmallVector<char, 0>> object =
|
||||
std::optional<mlir::gpu::SerializedObject> object =
|
||||
serializer.serializeToObject(gpuModule, options);
|
||||
// Check that the serializer was successful.
|
||||
ASSERT_TRUE(object != std::nullopt);
|
||||
ASSERT_TRUE(!object->empty());
|
||||
ASSERT_TRUE(!object->getObject().empty());
|
||||
}
|
||||
}
|
||||
|
||||
@ -203,11 +207,11 @@ TEST_F(MLIRTargetLLVMNVVM,
|
||||
optimizedCallback, isaCallback);
|
||||
|
||||
for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
|
||||
std::optional<SmallVector<char, 0>> object =
|
||||
std::optional<mlir::gpu::SerializedObject> object =
|
||||
serializer.serializeToObject(gpuModule, options);
|
||||
|
||||
ASSERT_TRUE(object != std::nullopt);
|
||||
ASSERT_TRUE(!object->empty());
|
||||
ASSERT_TRUE(!object->getObject().empty());
|
||||
ASSERT_TRUE(!initialLLVMIR.empty());
|
||||
ASSERT_TRUE(!linkedLLVMIR.empty());
|
||||
ASSERT_TRUE(!optimizedLLVMIR.empty());
|
||||
@ -275,7 +279,7 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(LinkedLLVMIRResource)) {
|
||||
gpu::CompilationTarget::Assembly, {}, {},
|
||||
linkedCallback);
|
||||
for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
|
||||
std::optional<SmallVector<char, 0>> object =
|
||||
std::optional<mlir::gpu::SerializedObject> object =
|
||||
serializer.serializeToObject(gpuModule, options);
|
||||
|
||||
// Verify that we correctly linked in the library: the external call is
|
||||
@ -294,6 +298,6 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(LinkedLLVMIRResource)) {
|
||||
ASSERT_FALSE(bar->empty());
|
||||
}
|
||||
ASSERT_TRUE(object != std::nullopt);
|
||||
ASSERT_TRUE(!object->empty());
|
||||
ASSERT_TRUE(!object->getObject().empty());
|
||||
}
|
||||
}
|
||||
|
||||
@ -85,15 +85,16 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLToLLVM)) {
|
||||
ASSERT_TRUE(!!serializer);
|
||||
gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Offload);
|
||||
for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
|
||||
std::optional<SmallVector<char, 0>> object =
|
||||
std::optional<mlir::gpu::SerializedObject> object =
|
||||
serializer.serializeToObject(gpuModule, options);
|
||||
// Check that the serializer was successful.
|
||||
ASSERT_TRUE(object != std::nullopt);
|
||||
ASSERT_TRUE(!object->empty());
|
||||
ASSERT_TRUE(!object->getObject().empty());
|
||||
|
||||
// Read the serialized module.
|
||||
llvm::MemoryBufferRef buffer(StringRef(object->data(), object->size()),
|
||||
"module");
|
||||
llvm::MemoryBufferRef buffer(
|
||||
StringRef(object->getObject().data(), object->getObject().size()),
|
||||
"module");
|
||||
llvm::LLVMContext llvmContext;
|
||||
llvm::Expected<std::unique_ptr<llvm::Module>> llvmModule =
|
||||
llvm::getLazyBitcodeModule(buffer, llvmContext);
|
||||
@ -121,11 +122,12 @@ TEST_F(MLIRTargetLLVMROCDL,
|
||||
ASSERT_TRUE(!!serializer);
|
||||
gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Assembly);
|
||||
for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
|
||||
std::optional<SmallVector<char, 0>> object =
|
||||
std::optional<mlir::gpu::SerializedObject> object =
|
||||
serializer.serializeToObject(gpuModule, options);
|
||||
// Check that the serializer was successful.
|
||||
EXPECT_TRUE(StringRef(object->data(), object->size())
|
||||
.contains(".amdhsa_code_object_version 6"));
|
||||
EXPECT_TRUE(
|
||||
StringRef(object->getObject().data(), object->getObject().size())
|
||||
.contains(".amdhsa_code_object_version 6"));
|
||||
}
|
||||
}
|
||||
|
||||
@ -147,11 +149,12 @@ TEST_F(MLIRTargetLLVMROCDL,
|
||||
ASSERT_TRUE(!!serializer);
|
||||
gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Assembly);
|
||||
for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
|
||||
std::optional<SmallVector<char, 0>> object =
|
||||
std::optional<mlir::gpu::SerializedObject> object =
|
||||
serializer.serializeToObject(gpuModule, options);
|
||||
// Check that the serializer was successful.
|
||||
EXPECT_TRUE(StringRef(object->data(), object->size())
|
||||
.contains(".amdhsa_code_object_version 4"));
|
||||
EXPECT_TRUE(
|
||||
StringRef(object->getObject().data(), object->getObject().size())
|
||||
.contains(".amdhsa_code_object_version 4"));
|
||||
}
|
||||
}
|
||||
|
||||
@ -171,14 +174,15 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLToPTX)) {
|
||||
ASSERT_TRUE(!!serializer);
|
||||
gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Assembly);
|
||||
for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
|
||||
std::optional<SmallVector<char, 0>> object =
|
||||
std::optional<mlir::gpu::SerializedObject> object =
|
||||
serializer.serializeToObject(gpuModule, options);
|
||||
// Check that the serializer was successful.
|
||||
ASSERT_TRUE(object != std::nullopt);
|
||||
ASSERT_TRUE(!object->empty());
|
||||
ASSERT_TRUE(!object->getObject().empty());
|
||||
|
||||
ASSERT_TRUE(
|
||||
StringRef(object->data(), object->size()).contains("rocdl_kernel"));
|
||||
StringRef(object->getObject().data(), object->getObject().size())
|
||||
.contains("rocdl_kernel"));
|
||||
}
|
||||
}
|
||||
|
||||
@ -201,11 +205,11 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLToBinary)) {
|
||||
ASSERT_TRUE(!!serializer);
|
||||
gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Binary);
|
||||
for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
|
||||
std::optional<SmallVector<char, 0>> object =
|
||||
std::optional<mlir::gpu::SerializedObject> object =
|
||||
serializer.serializeToObject(gpuModule, options);
|
||||
// Check that the serializer was successful.
|
||||
ASSERT_TRUE(object != std::nullopt);
|
||||
ASSERT_FALSE(object->empty());
|
||||
ASSERT_FALSE(object->getObject().empty());
|
||||
}
|
||||
}
|
||||
|
||||
@ -245,16 +249,16 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(GetELFMetadata)) {
|
||||
ASSERT_TRUE(!!serializer);
|
||||
gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Binary);
|
||||
for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
|
||||
std::optional<SmallVector<char, 0>> object =
|
||||
std::optional<mlir::gpu::SerializedObject> object =
|
||||
serializer.serializeToObject(gpuModule, options);
|
||||
// Check that the serializer was successful.
|
||||
ASSERT_TRUE(object != std::nullopt);
|
||||
ASSERT_FALSE(object->empty());
|
||||
ASSERT_FALSE(object->getObject().empty());
|
||||
if (!object)
|
||||
continue;
|
||||
// Get the metadata.
|
||||
gpu::KernelTableAttr metadata =
|
||||
ROCDL::getKernelMetadata(gpuModule, *object);
|
||||
ROCDL::getKernelMetadata(gpuModule, object->getObject());
|
||||
ASSERT_TRUE(metadata != nullptr);
|
||||
// There should be 4 kernels.
|
||||
ASSERT_TRUE(metadata.size() == 4);
|
||||
|
||||
@ -37,12 +37,12 @@ namespace {
|
||||
class TargetAttrImpl
|
||||
: public gpu::TargetAttrInterface::FallbackModel<TargetAttrImpl> {
|
||||
public:
|
||||
std::optional<SmallVector<char, 0>>
|
||||
std::optional<mlir::gpu::SerializedObject>
|
||||
serializeToObject(Attribute attribute, Operation *module,
|
||||
const gpu::TargetOptions &options) const;
|
||||
|
||||
Attribute createObject(Attribute attribute, Operation *module,
|
||||
const SmallVector<char, 0> &object,
|
||||
const mlir::gpu::SerializedObject &object,
|
||||
const gpu::TargetOptions &options) const;
|
||||
};
|
||||
} // namespace
|
||||
@ -82,13 +82,15 @@ TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(SerializeToLLVMBitcode)) {
|
||||
std::string targetTriple = llvm::sys::getProcessTriple();
|
||||
LLVM::ModuleToObject serializer(*(module->getOperation()), targetTriple, "",
|
||||
"");
|
||||
std::optional<SmallVector<char, 0>> serializedModule = serializer.run();
|
||||
std::optional<mlir::gpu::SerializedObject> serializedModule =
|
||||
serializer.run();
|
||||
ASSERT_TRUE(!!serializedModule);
|
||||
ASSERT_TRUE(!serializedModule->empty());
|
||||
ASSERT_TRUE(!serializedModule->getObject().empty());
|
||||
|
||||
// Read the serialized module.
|
||||
llvm::MemoryBufferRef buffer(
|
||||
StringRef(serializedModule->data(), serializedModule->size()), "module");
|
||||
llvm::MemoryBufferRef buffer(StringRef(serializedModule->getObject().data(),
|
||||
serializedModule->getObject().size()),
|
||||
"module");
|
||||
llvm::LLVMContext llvmContext;
|
||||
llvm::Expected<std::unique_ptr<llvm::Module>> llvmModule =
|
||||
llvm::getLazyBitcodeModule(buffer, llvmContext);
|
||||
@ -99,7 +101,7 @@ TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(SerializeToLLVMBitcode)) {
|
||||
ASSERT_TRUE((*llvmModule)->getFunction("foo") != nullptr);
|
||||
}
|
||||
|
||||
std::optional<SmallVector<char, 0>>
|
||||
std::optional<mlir::gpu::SerializedObject>
|
||||
TargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
|
||||
const gpu::TargetOptions &options) const {
|
||||
// Set a dummy attr to be retrieved by `createObject`.
|
||||
@ -113,14 +115,15 @@ TargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
|
||||
|
||||
Attribute
|
||||
TargetAttrImpl::createObject(Attribute attribute, Operation *module,
|
||||
const SmallVector<char, 0> &object,
|
||||
const mlir::gpu::SerializedObject &object,
|
||||
const gpu::TargetOptions &options) const {
|
||||
// Create a GPU object with the GPU module dictionary as the object
|
||||
// properties.
|
||||
return gpu::ObjectAttr::get(
|
||||
module->getContext(), attribute, gpu::CompilationTarget::Offload,
|
||||
StringAttr::get(module->getContext(),
|
||||
StringRef(object.data(), object.size())),
|
||||
StringAttr::get(
|
||||
module->getContext(),
|
||||
StringRef(object.getObject().data(), object.getObject().size())),
|
||||
module->getAttrDictionary(), /*kernels=*/nullptr);
|
||||
}
|
||||
|
||||
@ -140,11 +143,11 @@ TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(TargetAttrAPI)) {
|
||||
// Check the attribute holds the interface.
|
||||
ASSERT_TRUE(!!targetAttr);
|
||||
gpu::TargetOptions opts;
|
||||
std::optional<SmallVector<char, 0>> serializedBinary =
|
||||
std::optional<mlir::gpu::SerializedObject> serializedBinary =
|
||||
targetAttr.serializeToObject(*module, opts);
|
||||
// Check the serialized string.
|
||||
ASSERT_TRUE(!!serializedBinary);
|
||||
ASSERT_TRUE(!serializedBinary->empty());
|
||||
ASSERT_TRUE(!serializedBinary->getObject().empty());
|
||||
// Create the object attribute.
|
||||
auto object = cast<gpu::ObjectAttr>(
|
||||
targetAttr.createObject(*module, *serializedBinary, opts));
|
||||
@ -176,11 +179,11 @@ TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(CallbackInvokedWithInitialLLVMIR)) {
|
||||
gpu::TargetOptions opts(
|
||||
{}, {}, {}, {}, mlir::gpu::TargetOptions::getDefaultCompilationTarget(),
|
||||
{}, initialCallback);
|
||||
std::optional<SmallVector<char, 0>> serializedBinary =
|
||||
std::optional<mlir::gpu::SerializedObject> serializedBinary =
|
||||
targetAttr.serializeToObject(*module, opts);
|
||||
|
||||
ASSERT_TRUE(serializedBinary != std::nullopt);
|
||||
ASSERT_TRUE(!serializedBinary->empty());
|
||||
ASSERT_TRUE(!serializedBinary->getObject().empty());
|
||||
ASSERT_TRUE(!initialLLVMIR.empty());
|
||||
}
|
||||
|
||||
@ -204,11 +207,11 @@ TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(CallbackInvokedWithLinkedLLVMIR)) {
|
||||
gpu::TargetOptions opts(
|
||||
{}, {}, {}, {}, mlir::gpu::TargetOptions::getDefaultCompilationTarget(),
|
||||
{}, {}, linkedCallback);
|
||||
std::optional<SmallVector<char, 0>> serializedBinary =
|
||||
std::optional<mlir::gpu::SerializedObject> serializedBinary =
|
||||
targetAttr.serializeToObject(*module, opts);
|
||||
|
||||
ASSERT_TRUE(serializedBinary != std::nullopt);
|
||||
ASSERT_TRUE(!serializedBinary->empty());
|
||||
ASSERT_TRUE(!serializedBinary->getObject().empty());
|
||||
ASSERT_TRUE(!linkedLLVMIR.empty());
|
||||
}
|
||||
|
||||
@ -233,10 +236,10 @@ TEST_F(MLIRTargetLLVM,
|
||||
gpu::TargetOptions opts(
|
||||
{}, {}, {}, {}, mlir::gpu::TargetOptions::getDefaultCompilationTarget(),
|
||||
{}, {}, {}, optimizedCallback);
|
||||
std::optional<SmallVector<char, 0>> serializedBinary =
|
||||
std::optional<mlir::gpu::SerializedObject> serializedBinary =
|
||||
targetAttr.serializeToObject(*module, opts);
|
||||
|
||||
ASSERT_TRUE(serializedBinary != std::nullopt);
|
||||
ASSERT_TRUE(!serializedBinary->empty());
|
||||
ASSERT_TRUE(!serializedBinary->getObject().empty());
|
||||
ASSERT_TRUE(!optimizedLLVMIR.empty());
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user