[MLIR][GPU] Update serializeToObject to use SerializedObject wrapper and include ISA compiler logs (#176697)

This PR makes the compilation log from ISA compiler available to users by returning it as part of the `gpu::ObjectAttr` properties, following the existing pattern like `LLVMIRToISATimeInMs`. Currently, the compiler log (which contains useful information such as spill statistics when --verbose is passed) is only accessible in debug builds via `LLVM_DEBUG`. However, there are good reasons to make this information available in release builds as well: 1. Both `ptxas` and `libnvptxcompiler` are publicly available tools/libraries distributed with the CUDA Toolkit. The `--verbose` flag and its output are documented public features, not internal debug information. 2. The verbose output provides valuable insights for users. A new `SerializedObject` class is used to carry the metadata alongside the binary when returning from `serializeObject`.
2026-01-30 19:56:20 +08:00 · 2026-01-30 19:56:20 +08:00 · fbffdaa174
commit fbffdaa174
parent 1ce7a81593
14 changed files with 191 additions and 101 deletions
--- a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td
@ -37,7 +37,7 @@ def GPUTargetAttrInterface : AttrInterface<"TargetAttrInterface"> {
        is meant to be used for passing additional options that are not in the
        attribute.
      }],
-      "std::optional<::mlir::SmallVector<char, 0>>", "serializeToObject",
+      "std::optional<SerializedObject>", "serializeToObject",
      (ins "::mlir::Operation*":$module,
           "const ::mlir::gpu::TargetOptions&":$options)>,
    InterfaceMethod<[{
@ -50,7 +50,7 @@ def GPUTargetAttrInterface : AttrInterface<"TargetAttrInterface"> {
        attribute.
      }], "::mlir::Attribute", "createObject",
        (ins "::mlir::Operation *":$module,
-             "const ::llvm::SmallVector<char, 0> &":$object,
+             "const ::mlir::gpu::SerializedObject &":$object,
             "const ::mlir::gpu::TargetOptions &":$options)>
  ];
 }
--- a/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h
+++ b/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h
@ -14,6 +14,7 @@
 #define MLIR_DIALECT_GPU_IR_COMPILATIONINTERFACES_H

 #include "mlir/IR/Attributes.h"
+#include "mlir/IR/BuiltinAttributes.h"
 #include "llvm/IR/Module.h"

 namespace llvm {
@ -170,6 +171,24 @@ protected:
 private:
  TypeID typeID;
 };
+
+/// This class represents a serialized object (GPU binary) with metadata (e.g.
+/// timings, logs, ...).
+class SerializedObject {
+public:
+  SerializedObject(::mlir::SmallVector<char, 0> object,
+                   DictionaryAttr metadata = {})
+      : object(std::move(object)), metadata(metadata) {}
+
+  const SmallVector<char, 0> &getObject() const { return object; }
+
+  DictionaryAttr getMetadata() const { return metadata; }
+
+private:
+  SmallVector<char, 0> object;
+  DictionaryAttr metadata;
+};
+
 } // namespace gpu
 } // namespace mlir

--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
@ -157,6 +157,9 @@ def GpuNVVMAttachTarget: Pass<"nvvm-attach-target", ""> {
    Option<"ftzFlag", "ftz", "bool",
           /*default=*/"false",
           "Enable flush to zero for denormals.">,
+    Option<"compilerDiagnosticsFlag", "collect-compiler-diagnostics", "bool",
+           /*default=*/"false",
+           "Enable collection of compiler diagnostics.">,
    ListOption<"linkLibs", "l", "std::string",
           "Extra bitcode libraries paths to link to.">,
    Option<"cmdOptions", "ptxas-cmd-options", "std::string",
--- a/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp
@ -87,7 +87,7 @@ LogicalResult moduleSerializer(GPUModuleOp op,
    auto target = dyn_cast<gpu::TargetAttrInterface>(targetAttr);
    assert(target &&
           "Target attribute doesn't implements `TargetAttrInterface`.");
-    std::optional<SmallVector<char, 0>> serializedModule =
+    std::optional<SerializedObject> serializedModule =
        target.serializeToObject(op, targetOptions);
    if (!serializedModule) {
      op.emitError("An error happened while serializing the module.");
--- a/mlir/lib/Dialect/GPU/Transforms/NVVMAttachTarget.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/NVVMAttachTarget.cpp
@ -53,6 +53,8 @@ DictionaryAttr NVVMAttachTarget::getFlags(OpBuilder &builder) const {
    addFlag("fast");
  if (ftzFlag)
    addFlag("ftz");
+  if (compilerDiagnosticsFlag)
+    addFlag("collect-compiler-diagnostics");

  // Tokenize and set the optional command line options.
  if (!cmdOptions.empty()) {
--- a/mlir/lib/Target/LLVM/NVVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp
@ -59,12 +59,12 @@ namespace {
 class NVVMTargetAttrImpl
    : public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
 public:
-  std::optional<SmallVector<char, 0>>
+  std::optional<mlir::gpu::SerializedObject>
  serializeToObject(Attribute attribute, Operation *module,
                    const gpu::TargetOptions &options) const;

  Attribute createObject(Attribute attribute, Operation *module,
-                         const SmallVector<char, 0> &object,
+                         const mlir::gpu::SerializedObject &object,
                         const gpu::TargetOptions &options) const;
 };
 } // namespace
@ -232,6 +232,9 @@ public:
  /// is LLVMIR or ISA.
  std::optional<int64_t> getISAToBinaryTimeInMs();

+  /// Get the compiler log from ISA compiler.
+  StringRef getISACompilerLog() const;
+
 private:
  using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;

@ -253,6 +256,9 @@ private:

  /// ISA->Binary perf result.
  std::optional<int64_t> isaToBinaryTimeInMs;
+
+  /// Compiler log from ptxas or libnvptxcompiler.
+  std::string isaCompilerLog;
 };
 } // namespace

@ -285,6 +291,8 @@ std::optional<int64_t> NVPTXSerializer::getISAToBinaryTimeInMs() {
  return isaToBinaryTimeInMs;
 }

+StringRef NVPTXSerializer::getISACompilerLog() const { return isaCompilerLog; }
+
 gpu::GPUModuleOp NVPTXSerializer::getOperation() {
  return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
 }
@ -484,6 +492,11 @@ NVPTXSerializer::compileToBinary(StringRef ptxCode) {
                                /*MemoryLimit=*/0,
                                /*ErrMsg=*/&message))
    return emitLogError("`ptxas`");
+
+  if (target.hasFlag("collect-compiler-diagnostics")) {
+    if (auto logBuffer = llvm::MemoryBuffer::getFile(logFile->first))
+      isaCompilerLog = (*logBuffer)->getBuffer().str();
+  }
 #define DEBUG_TYPE "dump-sass"
  LLVM_DEBUG({
    std::optional<std::string> nvdisasm = findTool("nvdisasm");
@ -547,7 +560,7 @@ NVPTXSerializer::compileToBinary(StringRef ptxCode) {
    if (auto status = (expr)) {                                                \
      emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ")  \
                     << status;                                                \
-      return failure();                                                  \
+      return failure();                                                        \
    }                                                                          \
  } while (false)

@ -559,7 +572,7 @@ NVPTXSerializer::compileToBinary(StringRef ptxCode) {
    if (result != nvFatbinResult::NVFATBIN_SUCCESS) {                          \
      emitError(loc) << llvm::Twine(#expr).concat(" failed with error: ")      \
                     << nvFatbinGetErrorString(result);                        \
-      return failure();                                                  \
+      return failure();                                                        \
    }                                                                          \
  } while (false)

@ -611,21 +624,32 @@ NVPTXSerializer::compileToBinaryNVPTX(StringRef ptxCode) {
  RETURN_ON_NVPTXCOMPILER_ERROR(
      nvPTXCompilerGetCompiledProgram(compiler, (void *)binary.data()));

+  // Lambda to fetch info log; returns empty vector on failure or no log.
+  auto fetchInfoLog = [&]() -> SmallVector<char> {
+    size_t size = 0;
+    if (nvPTXCompilerGetInfoLogSize(compiler, &size) != NVPTXCOMPILE_SUCCESS ||
+        size == 0)
+      return {};
+    SmallVector<char> log(size + 1, 0);
+    if (nvPTXCompilerGetInfoLog(compiler, log.data()) != NVPTXCOMPILE_SUCCESS)
+      return {};
+    return log;
+  };
+
+  if (target.hasFlag("collect-compiler-diagnostics")) {
+    if (auto log = fetchInfoLog(); !log.empty())
+      isaCompilerLog = log.data();
+  }
+
 // Dump the log of the compiler, helpful if the verbose flag was passed.
 #define DEBUG_TYPE "serialize-to-binary"
  LLVM_DEBUG({
-    RETURN_ON_NVPTXCOMPILER_ERROR(
-        nvPTXCompilerGetInfoLogSize(compiler, &logSize));
-    if (logSize != 0) {
-      SmallVector<char> log(logSize + 1, 0);
-      RETURN_ON_NVPTXCOMPILER_ERROR(
-          nvPTXCompilerGetInfoLog(compiler, log.data()));
+    if (auto log = fetchInfoLog(); !log.empty())
      LDBG() << "NVPTX compiler invocation for module: "
             << getOperation().getNameAttr()
             << "\nArguments: " << llvm::interleaved(cmdOpts.second, " ")
             << "\nOutput\n"
             << log.data();
-    }
  });
 #undef DEBUG_TYPE
  RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
@ -725,7 +749,7 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
  return result;
 }

-std::optional<SmallVector<char, 0>>
+std::optional<mlir::gpu::SerializedObject>
 NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
                                      const gpu::TargetOptions &options) const {
  Builder builder(attribute.getContext());
@ -739,26 +763,38 @@ NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
  NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options);
  serializer.init();
  std::optional<SmallVector<char, 0>> result = serializer.run();
+  if (!result)
+    return std::nullopt;
+
+  SmallVector<NamedAttribute, 4> properties;
  auto llvmToISATimeInMs = serializer.getLLVMIRToISATimeInMs();
  if (llvmToISATimeInMs.has_value())
-    module->setAttr("LLVMIRToISATimeInMs",
-                    builder.getI64IntegerAttr(*llvmToISATimeInMs));
+    properties.push_back(builder.getNamedAttr(
+        "LLVMIRToISATimeInMs", builder.getI64IntegerAttr(*llvmToISATimeInMs)));
  auto isaToBinaryTimeInMs = serializer.getISAToBinaryTimeInMs();
  if (isaToBinaryTimeInMs.has_value())
-    module->setAttr("ISAToBinaryTimeInMs",
-                    builder.getI64IntegerAttr(*isaToBinaryTimeInMs));
-  return result;
+    properties.push_back(
+        builder.getNamedAttr("ISAToBinaryTimeInMs",
+                             builder.getI64IntegerAttr(*isaToBinaryTimeInMs)));
+  StringRef isaCompilerLog = serializer.getISACompilerLog();
+  if (!isaCompilerLog.empty())
+    properties.push_back(builder.getNamedAttr(
+        "ISACompilerLog", builder.getStringAttr(isaCompilerLog)));
+
+  return gpu::SerializedObject{std::move(*result),
+                               builder.getDictionaryAttr(properties)};
 }

 Attribute
 NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
-                                 const SmallVector<char, 0> &object,
+                                 const mlir::gpu::SerializedObject &object,
                                 const gpu::TargetOptions &options) const {
  auto target = cast<NVVMTargetAttr>(attribute);
  gpu::CompilationTarget format = options.getCompilationTarget();
  DictionaryAttr objectProps;
  Builder builder(attribute.getContext());
-  SmallVector<NamedAttribute, 4> properties;
+  SmallVector<NamedAttribute> properties =
+      llvm::to_vector(object.getMetadata().getValue());
  if (format == gpu::CompilationTarget::Assembly)
    properties.push_back(
        builder.getNamedAttr("O", builder.getI32IntegerAttr(target.getO())));
@ -767,19 +803,12 @@ NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
    properties.push_back(builder.getNamedAttr(gpu::elfSectionName,
                                              builder.getStringAttr(section)));

-  for (const auto *perfName : {"LLVMIRToISATimeInMs", "ISAToBinaryTimeInMs"}) {
-    if (module->hasAttr(perfName)) {
-      IntegerAttr attr = llvm::dyn_cast<IntegerAttr>(module->getAttr(perfName));
-      properties.push_back(builder.getNamedAttr(
-          perfName, builder.getI64IntegerAttr(attr.getInt())));
-    }
-  }
-
  if (!properties.empty())
    objectProps = builder.getDictionaryAttr(properties);

  return builder.getAttr<gpu::ObjectAttr>(
      attribute, format,
-      builder.getStringAttr(StringRef(object.data(), object.size())),
+      builder.getStringAttr(
+          StringRef(object.getObject().data(), object.getObject().size())),
      objectProps, /*kernels=*/nullptr);
 }
--- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp
+++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
@ -55,12 +55,12 @@ namespace {
 class ROCDLTargetAttrImpl
    : public gpu::TargetAttrInterface::FallbackModel<ROCDLTargetAttrImpl> {
 public:
-  std::optional<SmallVector<char, 0>>
+  std::optional<mlir::gpu::SerializedObject>
  serializeToObject(Attribute attribute, Operation *module,
                    const gpu::TargetOptions &options) const;

  Attribute createObject(Attribute attribute, Operation *module,
-                         const SmallVector<char, 0> &object,
+                         const mlir::gpu::SerializedObject &object,
                         const gpu::TargetOptions &options) const;
 };
 } // namespace
@ -473,7 +473,8 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
 }
 #endif // MLIR_ENABLE_ROCM_CONVERSIONS

-std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
+std::optional<mlir::gpu::SerializedObject>
+ROCDLTargetAttrImpl::serializeToObject(
    Attribute attribute, Operation *module,
    const gpu::TargetOptions &options) const {
  assert(module && "The module must be non null.");
@ -487,7 +488,10 @@ std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
  AMDGPUSerializer serializer(*module, cast<ROCDLTargetAttr>(attribute),
                              options);
  serializer.init();
-  return serializer.run();
+  std::optional<SmallVector<char, 0>> binary = serializer.run();
+  if (!binary)
+    return std::nullopt;
+  return gpu::SerializedObject{std::move(*binary)};
 #else
  module->emitError("the `AMDGPU` target was not built. Please enable it when "
                    "building LLVM");
@ -497,7 +501,7 @@ std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(

 Attribute
 ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module,
-                                  const SmallVector<char, 0> &object,
+                                  const mlir::gpu::SerializedObject &object,
                                  const gpu::TargetOptions &options) const {
  gpu::CompilationTarget format = options.getCompilationTarget();
  // If format is `fatbin` transform it to binary as `fatbin` is not yet
@ -505,12 +509,12 @@ ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module,
  gpu::KernelTableAttr kernels;
  if (format > gpu::CompilationTarget::Binary) {
    format = gpu::CompilationTarget::Binary;
-    kernels = ROCDL::getKernelMetadata(module, object);
+    kernels = ROCDL::getKernelMetadata(module, object.getObject());
  }
  DictionaryAttr properties{};
  Builder builder(attribute.getContext());
-  StringAttr objectStr =
-      builder.getStringAttr(StringRef(object.data(), object.size()));
+  StringAttr objectStr = builder.getStringAttr(
+      StringRef(object.getObject().data(), object.getObject().size()));
  return builder.getAttr<gpu::ObjectAttr>(attribute, format, objectStr,
                                          properties, kernels);
 }
--- a/mlir/lib/Target/LLVM/XeVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/XeVM/Target.cpp
@ -51,12 +51,12 @@ namespace {
 class XeVMTargetAttrImpl
    : public gpu::TargetAttrInterface::FallbackModel<XeVMTargetAttrImpl> {
 public:
-  std::optional<SmallVector<char, 0>>
+  std::optional<mlir::gpu::SerializedObject>
  serializeToObject(Attribute attribute, Operation *module,
                    const gpu::TargetOptions &options) const;

  Attribute createObject(Attribute attribute, Operation *module,
-                         const SmallVector<char, 0> &object,
+                         const mlir::gpu::SerializedObject &object,
                         const gpu::TargetOptions &options) const;
 };
 } // namespace
@ -354,7 +354,7 @@ SPIRVSerializer::translateToSPIRVBinary(llvm::Module &llvmModule,
  return targetISA;
 }

-std::optional<SmallVector<char, 0>>
+std::optional<mlir::gpu::SerializedObject>
 XeVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
                                      const gpu::TargetOptions &options) const {
  if (!module)
@ -383,7 +383,10 @@ XeVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
                      "without having the target built.");
 #endif

-    return serializer.run();
+    std::optional<SmallVector<char, 0>> binary = serializer.run();
+    if (!binary)
+      return std::nullopt;
+    return gpu::SerializedObject{std::move(*binary)};
  }
  module->emitError("Unsupported XeVM target triple: ") << xeTarget.getTriple();
  return std::nullopt;
@ -391,7 +394,7 @@ XeVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,

 Attribute
 XeVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
-                                 const SmallVector<char, 0> &object,
+                                 const mlir::gpu::SerializedObject &object,
                                 const gpu::TargetOptions &options) const {
  Builder builder(attribute.getContext());
  gpu::CompilationTarget format = options.getCompilationTarget();
@ -407,6 +410,7 @@ XeVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,

  return builder.getAttr<gpu::ObjectAttr>(
      attribute, format,
-      builder.getStringAttr(StringRef(object.data(), object.size())),
+      builder.getStringAttr(
+          StringRef(object.getObject().data(), object.getObject().size())),
      objectProps, /*kernels=*/nullptr);
 }
--- a/mlir/lib/Target/SPIRV/Target.cpp
+++ b/mlir/lib/Target/SPIRV/Target.cpp
@ -30,12 +30,12 @@ namespace {
 class SPIRVTargetAttrImpl
    : public gpu::TargetAttrInterface::FallbackModel<SPIRVTargetAttrImpl> {
 public:
-  std::optional<SmallVector<char, 0>>
+  std::optional<mlir::gpu::SerializedObject>
  serializeToObject(Attribute attribute, Operation *module,
                    const gpu::TargetOptions &options) const;

  Attribute createObject(Attribute attribute, Operation *module,
-                         const SmallVector<char, 0> &object,
+                         const mlir::gpu::SerializedObject &object,
                         const gpu::TargetOptions &options) const;
 };
 } // namespace
@ -56,7 +56,8 @@ void mlir::spirv::registerSPIRVTargetInterfaceExternalModels(
 }

 // Reuse from existing serializer
-std::optional<SmallVector<char, 0>> SPIRVTargetAttrImpl::serializeToObject(
+std::optional<mlir::gpu::SerializedObject>
+SPIRVTargetAttrImpl::serializeToObject(
    Attribute attribute, Operation *module,
    const gpu::TargetOptions &options) const {
  if (!module)
@ -84,19 +85,20 @@ std::optional<SmallVector<char, 0>> SPIRVTargetAttrImpl::serializeToObject(
  std::memcpy(spvData.data(), spvBinary.data(), spvData.size());

  spvMod.erase();
-  return spvData;
+  return gpu::SerializedObject{std::move(spvData)};
 }

 // Prepare Attribute for gpu.binary with serialized kernel object
 Attribute
 SPIRVTargetAttrImpl::createObject(Attribute attribute, Operation *module,
-                                  const SmallVector<char, 0> &object,
+                                  const mlir::gpu::SerializedObject &object,
                                  const gpu::TargetOptions &options) const {
  gpu::CompilationTarget format = options.getCompilationTarget();
  DictionaryAttr objectProps;
  Builder builder(attribute.getContext());
  return builder.getAttr<gpu::ObjectAttr>(
      attribute, format,
-      builder.getStringAttr(StringRef(object.data(), object.size())),
+      builder.getStringAttr(
+          StringRef(object.getObject().data(), object.getObject().size())),
      objectProps, /*kernels=*/nullptr);
 }
--- a/mlir/test/Dialect/GPU/nvvm-attach-target.mlir
+++ b/mlir/test/Dialect/GPU/nvvm-attach-target.mlir
@ -1,12 +1,14 @@
 // RUN: mlir-opt %s --nvvm-attach-target="" | FileCheck %s
 // RUN: mlir-opt %s --nvvm-attach-target="ptxas-cmd-options=--register-usage-level=8" | FileCheck %s -check-prefix=CHECK-OPTIONS
 // RUN: mlir-opt %s --nvvm-attach-target="verify-target-arch=false" | FileCheck %s -check-prefix=CHECK-DISABLE-VERIFYTARGET
+// RUN: mlir-opt %s --nvvm-attach-target="collect-compiler-diagnostics=true" | FileCheck %s -check-prefix=CHECK-DIAG

 module attributes {gpu.container_module} {
    // CHECK-LABEL:gpu.module @kernel_module1
    // CHECK: [#nvvm.target]
    // CHECK-OPTIONS: [#nvvm.target<flags = {"ptxas-cmd-options" = ["--register-usage-level=8"]}>]
    // CHECK-DISABLE-VERIFYTARGET: [#nvvm.target<verifyTarget = false>]
+    // CHECK-DIAG: [#nvvm.target<flags = {"collect-compiler-diagnostics"}>]
    gpu.module @kernel_module1 {
    llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr,
        %arg2: !llvm.ptr, %arg3: i64, %arg4: i64,
--- a/mlir/test/Integration/GPU/CUDA/module-to-binary-compiler-log.mlir
+++ b/mlir/test/Integration/GPU/CUDA/module-to-binary-compiler-log.mlir
@ -0,0 +1,14 @@
+// RUN: mlir-opt %s --gpu-module-to-binary="format=%gpu_compilation_format opts=--verbose" \
+// RUN:   | FileCheck %s
+
+module attributes {gpu.container_module} {
+  // CHECK-LABEL: gpu.binary @kernel_module
+  // CHECK: properties = {{{.*}}ISACompilerLog = {{.*}}
+  gpu.module @kernel_module [#nvvm.target<chip = "sm_70", flags = {"collect-compiler-diagnostics"}>] {
+    llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr,
+        %arg2: !llvm.ptr, %arg3: i64, %arg4: i64,
+        %arg5: i64) attributes {gpu.kernel} {
+      llvm.return
+    }
+  }
+}
--- a/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
+++ b/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
@ -86,15 +86,16 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMMToLLVM)) {
  ASSERT_TRUE(!!serializer);
  gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Offload);
  for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
        serializer.serializeToObject(gpuModule, options);
    // Check that the serializer was successful.
    ASSERT_TRUE(object != std::nullopt);
-    ASSERT_TRUE(!object->empty());
+    ASSERT_TRUE(!object->getObject().empty());

    // Read the serialized module.
-    llvm::MemoryBufferRef buffer(StringRef(object->data(), object->size()),
-                                 "module");
+    llvm::MemoryBufferRef buffer(
+        StringRef(object->getObject().data(), object->getObject().size()),
+        "module");
    llvm::LLVMContext llvmContext;
    llvm::Expected<std::unique_ptr<llvm::Module>> llvmModule =
        llvm::getLazyBitcodeModule(buffer, llvmContext);
@ -122,15 +123,18 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMToPTX)) {
  ASSERT_TRUE(!!serializer);
  gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Assembly);
  for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
        serializer.serializeToObject(gpuModule, options);
    // Check that the serializer was successful.
    ASSERT_TRUE(object != std::nullopt);
-    ASSERT_TRUE(!object->empty());
+    ASSERT_TRUE(!object->getObject().empty());

    ASSERT_TRUE(
-        StringRef(object->data(), object->size()).contains("nvvm_kernel"));
-    ASSERT_TRUE(StringRef(object->data(), object->size()).count('\0') == 0);
+        StringRef(object->getObject().data(), object->getObject().size())
+            .contains("nvvm_kernel"));
+    ASSERT_TRUE(
+        StringRef(object->getObject().data(), object->getObject().size())
+            .count('\0') == 0);
  }
 }

@ -153,11 +157,11 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMToBinary)) {
  ASSERT_TRUE(!!serializer);
  gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Binary);
  for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
        serializer.serializeToObject(gpuModule, options);
    // Check that the serializer was successful.
    ASSERT_TRUE(object != std::nullopt);
-    ASSERT_TRUE(!object->empty());
+    ASSERT_TRUE(!object->getObject().empty());
  }
 }

@ -203,11 +207,11 @@ TEST_F(MLIRTargetLLVMNVVM,
                             optimizedCallback, isaCallback);

  for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
        serializer.serializeToObject(gpuModule, options);

    ASSERT_TRUE(object != std::nullopt);
-    ASSERT_TRUE(!object->empty());
+    ASSERT_TRUE(!object->getObject().empty());
    ASSERT_TRUE(!initialLLVMIR.empty());
    ASSERT_TRUE(!linkedLLVMIR.empty());
    ASSERT_TRUE(!optimizedLLVMIR.empty());
@ -275,7 +279,7 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(LinkedLLVMIRResource)) {
                             gpu::CompilationTarget::Assembly, {}, {},
                             linkedCallback);
  for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
        serializer.serializeToObject(gpuModule, options);

    // Verify that we correctly linked in the library: the external call is
@ -294,6 +298,6 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(LinkedLLVMIRResource)) {
      ASSERT_FALSE(bar->empty());
    }
    ASSERT_TRUE(object != std::nullopt);
-    ASSERT_TRUE(!object->empty());
+    ASSERT_TRUE(!object->getObject().empty());
  }
 }
--- a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp
+++ b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp
@ -85,15 +85,16 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLToLLVM)) {
  ASSERT_TRUE(!!serializer);
  gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Offload);
  for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
        serializer.serializeToObject(gpuModule, options);
    // Check that the serializer was successful.
    ASSERT_TRUE(object != std::nullopt);
-    ASSERT_TRUE(!object->empty());
+    ASSERT_TRUE(!object->getObject().empty());

    // Read the serialized module.
-    llvm::MemoryBufferRef buffer(StringRef(object->data(), object->size()),
-                                 "module");
+    llvm::MemoryBufferRef buffer(
+        StringRef(object->getObject().data(), object->getObject().size()),
+        "module");
    llvm::LLVMContext llvmContext;
    llvm::Expected<std::unique_ptr<llvm::Module>> llvmModule =
        llvm::getLazyBitcodeModule(buffer, llvmContext);
@ -121,11 +122,12 @@ TEST_F(MLIRTargetLLVMROCDL,
  ASSERT_TRUE(!!serializer);
  gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Assembly);
  for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
        serializer.serializeToObject(gpuModule, options);
    // Check that the serializer was successful.
-    EXPECT_TRUE(StringRef(object->data(), object->size())
-                    .contains(".amdhsa_code_object_version 6"));
+    EXPECT_TRUE(
+        StringRef(object->getObject().data(), object->getObject().size())
+            .contains(".amdhsa_code_object_version 6"));
  }
 }

@ -147,11 +149,12 @@ TEST_F(MLIRTargetLLVMROCDL,
  ASSERT_TRUE(!!serializer);
  gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Assembly);
  for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
        serializer.serializeToObject(gpuModule, options);
    // Check that the serializer was successful.
-    EXPECT_TRUE(StringRef(object->data(), object->size())
-                    .contains(".amdhsa_code_object_version 4"));
+    EXPECT_TRUE(
+        StringRef(object->getObject().data(), object->getObject().size())
+            .contains(".amdhsa_code_object_version 4"));
  }
 }

@ -171,14 +174,15 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLToPTX)) {
  ASSERT_TRUE(!!serializer);
  gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Assembly);
  for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
        serializer.serializeToObject(gpuModule, options);
    // Check that the serializer was successful.
    ASSERT_TRUE(object != std::nullopt);
-    ASSERT_TRUE(!object->empty());
+    ASSERT_TRUE(!object->getObject().empty());

    ASSERT_TRUE(
-        StringRef(object->data(), object->size()).contains("rocdl_kernel"));
+        StringRef(object->getObject().data(), object->getObject().size())
+            .contains("rocdl_kernel"));
  }
 }

@ -201,11 +205,11 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLToBinary)) {
  ASSERT_TRUE(!!serializer);
  gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Binary);
  for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
        serializer.serializeToObject(gpuModule, options);
    // Check that the serializer was successful.
    ASSERT_TRUE(object != std::nullopt);
-    ASSERT_FALSE(object->empty());
+    ASSERT_FALSE(object->getObject().empty());
  }
 }

@ -245,16 +249,16 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(GetELFMetadata)) {
  ASSERT_TRUE(!!serializer);
  gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Binary);
  for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
        serializer.serializeToObject(gpuModule, options);
    // Check that the serializer was successful.
    ASSERT_TRUE(object != std::nullopt);
-    ASSERT_FALSE(object->empty());
+    ASSERT_FALSE(object->getObject().empty());
    if (!object)
      continue;
    // Get the metadata.
    gpu::KernelTableAttr metadata =
-        ROCDL::getKernelMetadata(gpuModule, *object);
+        ROCDL::getKernelMetadata(gpuModule, object->getObject());
    ASSERT_TRUE(metadata != nullptr);
    // There should be 4 kernels.
    ASSERT_TRUE(metadata.size() == 4);
--- a/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp
+++ b/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp
@ -37,12 +37,12 @@ namespace {
 class TargetAttrImpl
    : public gpu::TargetAttrInterface::FallbackModel<TargetAttrImpl> {
 public:
-  std::optional<SmallVector<char, 0>>
+  std::optional<mlir::gpu::SerializedObject>
  serializeToObject(Attribute attribute, Operation *module,
                    const gpu::TargetOptions &options) const;

  Attribute createObject(Attribute attribute, Operation *module,
-                         const SmallVector<char, 0> &object,
+                         const mlir::gpu::SerializedObject &object,
                         const gpu::TargetOptions &options) const;
 };
 } // namespace
@ -82,13 +82,15 @@ TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(SerializeToLLVMBitcode)) {
  std::string targetTriple = llvm::sys::getProcessTriple();
  LLVM::ModuleToObject serializer(*(module->getOperation()), targetTriple, "",
                                  "");
-  std::optional<SmallVector<char, 0>> serializedModule = serializer.run();
+  std::optional<mlir::gpu::SerializedObject> serializedModule =
+      serializer.run();
  ASSERT_TRUE(!!serializedModule);
-  ASSERT_TRUE(!serializedModule->empty());
+  ASSERT_TRUE(!serializedModule->getObject().empty());

  // Read the serialized module.
-  llvm::MemoryBufferRef buffer(
-      StringRef(serializedModule->data(), serializedModule->size()), "module");
+  llvm::MemoryBufferRef buffer(StringRef(serializedModule->getObject().data(),
+                                         serializedModule->getObject().size()),
+                               "module");
  llvm::LLVMContext llvmContext;
  llvm::Expected<std::unique_ptr<llvm::Module>> llvmModule =
      llvm::getLazyBitcodeModule(buffer, llvmContext);
@ -99,7 +101,7 @@ TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(SerializeToLLVMBitcode)) {
  ASSERT_TRUE((*llvmModule)->getFunction("foo") != nullptr);
 }

-std::optional<SmallVector<char, 0>>
+std::optional<mlir::gpu::SerializedObject>
 TargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
                                  const gpu::TargetOptions &options) const {
  // Set a dummy attr to be retrieved by `createObject`.
@ -113,14 +115,15 @@ TargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,

 Attribute
 TargetAttrImpl::createObject(Attribute attribute, Operation *module,
-                             const SmallVector<char, 0> &object,
+                             const mlir::gpu::SerializedObject &object,
                             const gpu::TargetOptions &options) const {
  // Create a GPU object with the GPU module dictionary as the object
  // properties.
  return gpu::ObjectAttr::get(
      module->getContext(), attribute, gpu::CompilationTarget::Offload,
-      StringAttr::get(module->getContext(),
-                      StringRef(object.data(), object.size())),
+      StringAttr::get(
+          module->getContext(),
+          StringRef(object.getObject().data(), object.getObject().size())),
      module->getAttrDictionary(), /*kernels=*/nullptr);
 }

@ -140,11 +143,11 @@ TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(TargetAttrAPI)) {
  // Check the attribute holds the interface.
  ASSERT_TRUE(!!targetAttr);
  gpu::TargetOptions opts;
-  std::optional<SmallVector<char, 0>> serializedBinary =
+  std::optional<mlir::gpu::SerializedObject> serializedBinary =
      targetAttr.serializeToObject(*module, opts);
  // Check the serialized string.
  ASSERT_TRUE(!!serializedBinary);
-  ASSERT_TRUE(!serializedBinary->empty());
+  ASSERT_TRUE(!serializedBinary->getObject().empty());
  // Create the object attribute.
  auto object = cast<gpu::ObjectAttr>(
      targetAttr.createObject(*module, *serializedBinary, opts));
@ -176,11 +179,11 @@ TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(CallbackInvokedWithInitialLLVMIR)) {
  gpu::TargetOptions opts(
      {}, {}, {}, {}, mlir::gpu::TargetOptions::getDefaultCompilationTarget(),
      {}, initialCallback);
-  std::optional<SmallVector<char, 0>> serializedBinary =
+  std::optional<mlir::gpu::SerializedObject> serializedBinary =
      targetAttr.serializeToObject(*module, opts);

  ASSERT_TRUE(serializedBinary != std::nullopt);
-  ASSERT_TRUE(!serializedBinary->empty());
+  ASSERT_TRUE(!serializedBinary->getObject().empty());
  ASSERT_TRUE(!initialLLVMIR.empty());
 }

@ -204,11 +207,11 @@ TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(CallbackInvokedWithLinkedLLVMIR)) {
  gpu::TargetOptions opts(
      {}, {}, {}, {}, mlir::gpu::TargetOptions::getDefaultCompilationTarget(),
      {}, {}, linkedCallback);
-  std::optional<SmallVector<char, 0>> serializedBinary =
+  std::optional<mlir::gpu::SerializedObject> serializedBinary =
      targetAttr.serializeToObject(*module, opts);

  ASSERT_TRUE(serializedBinary != std::nullopt);
-  ASSERT_TRUE(!serializedBinary->empty());
+  ASSERT_TRUE(!serializedBinary->getObject().empty());
  ASSERT_TRUE(!linkedLLVMIR.empty());
 }

@ -233,10 +236,10 @@ TEST_F(MLIRTargetLLVM,
  gpu::TargetOptions opts(
      {}, {}, {}, {}, mlir::gpu::TargetOptions::getDefaultCompilationTarget(),
      {}, {}, {}, optimizedCallback);
-  std::optional<SmallVector<char, 0>> serializedBinary =
+  std::optional<mlir::gpu::SerializedObject> serializedBinary =
      targetAttr.serializeToObject(*module, opts);

  ASSERT_TRUE(serializedBinary != std::nullopt);
-  ASSERT_TRUE(!serializedBinary->empty());
+  ASSERT_TRUE(!serializedBinary->getObject().empty());
  ASSERT_TRUE(!optimizedLLVMIR.empty());
 }