From fbffdaa174f04ce88e97b420a549d2c8d2a0b102 Mon Sep 17 00:00:00 2001
From: Zichen Lu <mikaovo2000@gmail.com>
Date: Fri, 30 Jan 2026 19:56:20 +0800
Subject: [PATCH] [MLIR][GPU] Update serializeToObject to use SerializedObject
 wrapper and include ISA compiler logs (#176697)

This PR makes the compilation log from ISA compiler available to users
by returning it as part of the `gpu::ObjectAttr` properties, following
the existing pattern like `LLVMIRToISATimeInMs`.

Currently, the compiler log (which contains useful information such as
spill statistics when --verbose is passed) is only accessible in debug
builds via `LLVM_DEBUG`. However, there are good reasons to make this
information available in release builds as well:

1. Both `ptxas` and `libnvptxcompiler` are publicly available
tools/libraries distributed with the CUDA Toolkit. The `--verbose` flag
and its output are documented public features, not internal debug
information.
2. The verbose output provides valuable insights for users.

A new `SerializedObject` class is used to carry the metadata alongside
the binary when returning from `serializeObject`.
---
 .../GPU/IR/CompilationAttrInterfaces.td       |  4 +-
 .../Dialect/GPU/IR/CompilationInterfaces.h    | 19 +++++
 .../mlir/Dialect/GPU/Transforms/Passes.td     |  3 +
 .../Dialect/GPU/Transforms/ModuleToBinary.cpp |  2 +-
 .../GPU/Transforms/NVVMAttachTarget.cpp       |  2 +
 mlir/lib/Target/LLVM/NVVM/Target.cpp          | 85 +++++++++++++------
 mlir/lib/Target/LLVM/ROCDL/Target.cpp         | 20 +++--
 mlir/lib/Target/LLVM/XeVM/Target.cpp          | 16 ++--
 mlir/lib/Target/SPIRV/Target.cpp              | 14 +--
 mlir/test/Dialect/GPU/nvvm-attach-target.mlir |  2 +
 .../CUDA/module-to-binary-compiler-log.mlir   | 14 +++
 .../Target/LLVM/SerializeNVVMTarget.cpp       | 32 ++++---
 .../Target/LLVM/SerializeROCDLTarget.cpp      | 40 +++++----
 .../Target/LLVM/SerializeToLLVMBitcode.cpp    | 39 +++++----
 14 files changed, 191 insertions(+), 101 deletions(-)
 create mode 100644 mlir/test/Integration/GPU/CUDA/module-to-binary-compiler-log.mlir
diff --git a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td
index 018821f16c3a..ec7468008ce3 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td
@@ -37,7 +37,7 @@ def GPUTargetAttrInterface : AttrInterface<"TargetAttrInterface"> {
         is meant to be used for passing additional options that are not in the
         attribute.
       }],
-      "std::optional<::mlir::SmallVector<char, 0>>", "serializeToObject",
+      "std::optional<SerializedObject>", "serializeToObject",
       (ins "::mlir::Operation*":$module,
            "const ::mlir::gpu::TargetOptions&":$options)>,
     InterfaceMethod<[{
@@ -50,7 +50,7 @@ def GPUTargetAttrInterface : AttrInterface<"TargetAttrInterface"> {
         attribute.
       }], "::mlir::Attribute", "createObject",
         (ins "::mlir::Operation *":$module,
-             "const ::llvm::SmallVector<char, 0> &":$object,
+             "const ::mlir::gpu::SerializedObject &":$object,
              "const ::mlir::gpu::TargetOptions &":$options)>
   ];
 }
diff --git a/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h b/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h
index 139360f8bd3f..c6b5f7070b17 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h
+++ b/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h
@@ -14,6 +14,7 @@
 #define MLIR_DIALECT_GPU_IR_COMPILATIONINTERFACES_H
 
 #include "mlir/IR/Attributes.h"
+#include "mlir/IR/BuiltinAttributes.h"
 #include "llvm/IR/Module.h"
 
 namespace llvm {
@@ -170,6 +171,24 @@ protected:
 private:
   TypeID typeID;
 };
+
+/// This class represents a serialized object (GPU binary) with metadata (e.g.
+/// timings, logs, ...).
+class SerializedObject {
+public:
+  SerializedObject(::mlir::SmallVector<char, 0> object,
+                   DictionaryAttr metadata = {})
+      : object(std::move(object)), metadata(metadata) {}
+
+  const SmallVector<char, 0> &getObject() const { return object; }
+
+  DictionaryAttr getMetadata() const { return metadata; }
+
+private:
+  SmallVector<char, 0> object;
+  DictionaryAttr metadata;
+};
+
 } // namespace gpu
 } // namespace mlir
 
diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
index f3c2b9ad830f..93c19f41a9c4 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
@@ -157,6 +157,9 @@ def GpuNVVMAttachTarget: Pass<"nvvm-attach-target", ""> {
     Option<"ftzFlag", "ftz", "bool",
            /*default=*/"false",
            "Enable flush to zero for denormals.">,
+    Option<"compilerDiagnosticsFlag", "collect-compiler-diagnostics", "bool",
+           /*default=*/"false",
+           "Enable collection of compiler diagnostics.">,
     ListOption<"linkLibs", "l", "std::string",
            "Extra bitcode libraries paths to link to.">,
     Option<"cmdOptions", "ptxas-cmd-options", "std::string",
diff --git a/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp b/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp
index 95d5cadbd4e1..e359b8620b4e 100644
--- a/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp
@@ -87,7 +87,7 @@ LogicalResult moduleSerializer(GPUModuleOp op,
     auto target = dyn_cast<gpu::TargetAttrInterface>(targetAttr);
     assert(target &&
            "Target attribute doesn't implements `TargetAttrInterface`.");
-    std::optional<SmallVector<char, 0>> serializedModule =
+    std::optional<SerializedObject> serializedModule =
         target.serializeToObject(op, targetOptions);
     if (!serializedModule) {
       op.emitError("An error happened while serializing the module.");
diff --git a/mlir/lib/Dialect/GPU/Transforms/NVVMAttachTarget.cpp b/mlir/lib/Dialect/GPU/Transforms/NVVMAttachTarget.cpp
index a28237913ce2..daccb86e23f6 100644
--- a/mlir/lib/Dialect/GPU/Transforms/NVVMAttachTarget.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/NVVMAttachTarget.cpp
@@ -53,6 +53,8 @@ DictionaryAttr NVVMAttachTarget::getFlags(OpBuilder &builder) const {
     addFlag("fast");
   if (ftzFlag)
     addFlag("ftz");
+  if (compilerDiagnosticsFlag)
+    addFlag("collect-compiler-diagnostics");
 
   // Tokenize and set the optional command line options.
   if (!cmdOptions.empty()) {
diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp
index 33a246917d2e..2d197a162a7a 100644
--- a/mlir/lib/Target/LLVM/NVVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp
@@ -59,12 +59,12 @@ namespace {
 class NVVMTargetAttrImpl
     : public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
 public:
-  std::optional<SmallVector<char, 0>>
+  std::optional<mlir::gpu::SerializedObject>
   serializeToObject(Attribute attribute, Operation *module,
                     const gpu::TargetOptions &options) const;
 
   Attribute createObject(Attribute attribute, Operation *module,
-                         const SmallVector<char, 0> &object,
+                         const mlir::gpu::SerializedObject &object,
                          const gpu::TargetOptions &options) const;
 };
 } // namespace
@@ -232,6 +232,9 @@ public:
   /// is LLVMIR or ISA.
   std::optional<int64_t> getISAToBinaryTimeInMs();
 
+  /// Get the compiler log from ISA compiler.
+  StringRef getISACompilerLog() const;
+
 private:
   using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
 
@@ -253,6 +256,9 @@ private:
 
   /// ISA->Binary perf result.
   std::optional<int64_t> isaToBinaryTimeInMs;
+
+  /// Compiler log from ptxas or libnvptxcompiler.
+  std::string isaCompilerLog;
 };
 } // namespace
 
@@ -285,6 +291,8 @@ std::optional<int64_t> NVPTXSerializer::getISAToBinaryTimeInMs() {
   return isaToBinaryTimeInMs;
 }
 
+StringRef NVPTXSerializer::getISACompilerLog() const { return isaCompilerLog; }
+
 gpu::GPUModuleOp NVPTXSerializer::getOperation() {
   return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
 }
@@ -484,6 +492,11 @@ NVPTXSerializer::compileToBinary(StringRef ptxCode) {
                                 /*MemoryLimit=*/0,
                                 /*ErrMsg=*/&message))
     return emitLogError("`ptxas`");
+
+  if (target.hasFlag("collect-compiler-diagnostics")) {
+    if (auto logBuffer = llvm::MemoryBuffer::getFile(logFile->first))
+      isaCompilerLog = (*logBuffer)->getBuffer().str();
+  }
 #define DEBUG_TYPE "dump-sass"
   LLVM_DEBUG({
     std::optional<std::string> nvdisasm = findTool("nvdisasm");
@@ -547,7 +560,7 @@ NVPTXSerializer::compileToBinary(StringRef ptxCode) {
     if (auto status = (expr)) {                                                \
       emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ")  \
                      << status;                                                \
-      return failure();                                                  \
+      return failure();                                                        \
     }                                                                          \
   } while (false)
 
@@ -559,7 +572,7 @@ NVPTXSerializer::compileToBinary(StringRef ptxCode) {
     if (result != nvFatbinResult::NVFATBIN_SUCCESS) {                          \
       emitError(loc) << llvm::Twine(#expr).concat(" failed with error: ")      \
                      << nvFatbinGetErrorString(result);                        \
-      return failure();                                                  \
+      return failure();                                                        \
     }                                                                          \
   } while (false)
 
@@ -611,21 +624,32 @@ NVPTXSerializer::compileToBinaryNVPTX(StringRef ptxCode) {
   RETURN_ON_NVPTXCOMPILER_ERROR(
       nvPTXCompilerGetCompiledProgram(compiler, (void *)binary.data()));
 
+  // Lambda to fetch info log; returns empty vector on failure or no log.
+  auto fetchInfoLog = [&]() -> SmallVector<char> {
+    size_t size = 0;
+    if (nvPTXCompilerGetInfoLogSize(compiler, &size) != NVPTXCOMPILE_SUCCESS ||
+        size == 0)
+      return {};
+    SmallVector<char> log(size + 1, 0);
+    if (nvPTXCompilerGetInfoLog(compiler, log.data()) != NVPTXCOMPILE_SUCCESS)
+      return {};
+    return log;
+  };
+
+  if (target.hasFlag("collect-compiler-diagnostics")) {
+    if (auto log = fetchInfoLog(); !log.empty())
+      isaCompilerLog = log.data();
+  }
+
 // Dump the log of the compiler, helpful if the verbose flag was passed.
 #define DEBUG_TYPE "serialize-to-binary"
   LLVM_DEBUG({
-    RETURN_ON_NVPTXCOMPILER_ERROR(
-        nvPTXCompilerGetInfoLogSize(compiler, &logSize));
-    if (logSize != 0) {
-      SmallVector<char> log(logSize + 1, 0);
-      RETURN_ON_NVPTXCOMPILER_ERROR(
-          nvPTXCompilerGetInfoLog(compiler, log.data()));
+    if (auto log = fetchInfoLog(); !log.empty())
       LDBG() << "NVPTX compiler invocation for module: "
              << getOperation().getNameAttr()
              << "\nArguments: " << llvm::interleaved(cmdOpts.second, " ")
              << "\nOutput\n"
              << log.data();
-    }
   });
 #undef DEBUG_TYPE
   RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
@@ -725,7 +749,7 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
   return result;
 }
 
-std::optional<SmallVector<char, 0>>
+std::optional<mlir::gpu::SerializedObject>
 NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
                                       const gpu::TargetOptions &options) const {
   Builder builder(attribute.getContext());
@@ -739,26 +763,38 @@ NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
   NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options);
   serializer.init();
   std::optional<SmallVector<char, 0>> result = serializer.run();
+  if (!result)
+    return std::nullopt;
+
+  SmallVector<NamedAttribute, 4> properties;
   auto llvmToISATimeInMs = serializer.getLLVMIRToISATimeInMs();
   if (llvmToISATimeInMs.has_value())
-    module->setAttr("LLVMIRToISATimeInMs",
-                    builder.getI64IntegerAttr(*llvmToISATimeInMs));
+    properties.push_back(builder.getNamedAttr(
+        "LLVMIRToISATimeInMs", builder.getI64IntegerAttr(*llvmToISATimeInMs)));
   auto isaToBinaryTimeInMs = serializer.getISAToBinaryTimeInMs();
   if (isaToBinaryTimeInMs.has_value())
-    module->setAttr("ISAToBinaryTimeInMs",
-                    builder.getI64IntegerAttr(*isaToBinaryTimeInMs));
-  return result;
+    properties.push_back(
+        builder.getNamedAttr("ISAToBinaryTimeInMs",
+                             builder.getI64IntegerAttr(*isaToBinaryTimeInMs)));
+  StringRef isaCompilerLog = serializer.getISACompilerLog();
+  if (!isaCompilerLog.empty())
+    properties.push_back(builder.getNamedAttr(
+        "ISACompilerLog", builder.getStringAttr(isaCompilerLog)));
+
+  return gpu::SerializedObject{std::move(*result),
+                               builder.getDictionaryAttr(properties)};
 }
 
 Attribute
 NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
-                                 const SmallVector<char, 0> &object,
+                                 const mlir::gpu::SerializedObject &object,
                                  const gpu::TargetOptions &options) const {
   auto target = cast<NVVMTargetAttr>(attribute);
   gpu::CompilationTarget format = options.getCompilationTarget();
   DictionaryAttr objectProps;
   Builder builder(attribute.getContext());
-  SmallVector<NamedAttribute, 4> properties;
+  SmallVector<NamedAttribute> properties =
+      llvm::to_vector(object.getMetadata().getValue());
   if (format == gpu::CompilationTarget::Assembly)
     properties.push_back(
         builder.getNamedAttr("O", builder.getI32IntegerAttr(target.getO())));
@@ -767,19 +803,12 @@ NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
     properties.push_back(builder.getNamedAttr(gpu::elfSectionName,
                                               builder.getStringAttr(section)));
 
-  for (const auto *perfName : {"LLVMIRToISATimeInMs", "ISAToBinaryTimeInMs"}) {
-    if (module->hasAttr(perfName)) {
-      IntegerAttr attr = llvm::dyn_cast<IntegerAttr>(module->getAttr(perfName));
-      properties.push_back(builder.getNamedAttr(
-          perfName, builder.getI64IntegerAttr(attr.getInt())));
-    }
-  }
-
   if (!properties.empty())
     objectProps = builder.getDictionaryAttr(properties);
 
   return builder.getAttr<gpu::ObjectAttr>(
       attribute, format,
-      builder.getStringAttr(StringRef(object.data(), object.size())),
+      builder.getStringAttr(
+          StringRef(object.getObject().data(), object.getObject().size())),
       objectProps, /*kernels=*/nullptr);
 }
diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
index 1af7eabfb4b1..60962efca829 100644
--- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp
+++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
@@ -55,12 +55,12 @@ namespace {
 class ROCDLTargetAttrImpl
     : public gpu::TargetAttrInterface::FallbackModel<ROCDLTargetAttrImpl> {
 public:
-  std::optional<SmallVector<char, 0>>
+  std::optional<mlir::gpu::SerializedObject>
   serializeToObject(Attribute attribute, Operation *module,
                     const gpu::TargetOptions &options) const;
 
   Attribute createObject(Attribute attribute, Operation *module,
-                         const SmallVector<char, 0> &object,
+                         const mlir::gpu::SerializedObject &object,
                          const gpu::TargetOptions &options) const;
 };
 } // namespace
@@ -473,7 +473,8 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
 }
 #endif // MLIR_ENABLE_ROCM_CONVERSIONS
 
-std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
+std::optional<mlir::gpu::SerializedObject>
+ROCDLTargetAttrImpl::serializeToObject(
     Attribute attribute, Operation *module,
     const gpu::TargetOptions &options) const {
   assert(module && "The module must be non null.");
@@ -487,7 +488,10 @@ std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
   AMDGPUSerializer serializer(*module, cast<ROCDLTargetAttr>(attribute),
                               options);
   serializer.init();
-  return serializer.run();
+  std::optional<SmallVector<char, 0>> binary = serializer.run();
+  if (!binary)
+    return std::nullopt;
+  return gpu::SerializedObject{std::move(*binary)};
 #else
   module->emitError("the `AMDGPU` target was not built. Please enable it when "
                     "building LLVM");
@@ -497,7 +501,7 @@ std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
 
 Attribute
 ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module,
-                                  const SmallVector<char, 0> &object,
+                                  const mlir::gpu::SerializedObject &object,
                                   const gpu::TargetOptions &options) const {
   gpu::CompilationTarget format = options.getCompilationTarget();
   // If format is `fatbin` transform it to binary as `fatbin` is not yet
@@ -505,12 +509,12 @@ ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module,
   gpu::KernelTableAttr kernels;
   if (format > gpu::CompilationTarget::Binary) {
     format = gpu::CompilationTarget::Binary;
-    kernels = ROCDL::getKernelMetadata(module, object);
+    kernels = ROCDL::getKernelMetadata(module, object.getObject());
   }
   DictionaryAttr properties{};
   Builder builder(attribute.getContext());
-  StringAttr objectStr =
-      builder.getStringAttr(StringRef(object.data(), object.size()));
+  StringAttr objectStr = builder.getStringAttr(
+      StringRef(object.getObject().data(), object.getObject().size()));
   return builder.getAttr<gpu::ObjectAttr>(attribute, format, objectStr,
                                           properties, kernels);
 }
diff --git a/mlir/lib/Target/LLVM/XeVM/Target.cpp b/mlir/lib/Target/LLVM/XeVM/Target.cpp
index 3ea13bdd3ea6..4285a4e37bec 100644
--- a/mlir/lib/Target/LLVM/XeVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/XeVM/Target.cpp
@@ -51,12 +51,12 @@ namespace {
 class XeVMTargetAttrImpl
     : public gpu::TargetAttrInterface::FallbackModel<XeVMTargetAttrImpl> {
 public:
-  std::optional<SmallVector<char, 0>>
+  std::optional<mlir::gpu::SerializedObject>
   serializeToObject(Attribute attribute, Operation *module,
                     const gpu::TargetOptions &options) const;
 
   Attribute createObject(Attribute attribute, Operation *module,
-                         const SmallVector<char, 0> &object,
+                         const mlir::gpu::SerializedObject &object,
                          const gpu::TargetOptions &options) const;
 };
 } // namespace
@@ -354,7 +354,7 @@ SPIRVSerializer::translateToSPIRVBinary(llvm::Module &llvmModule,
   return targetISA;
 }
 
-std::optional<SmallVector<char, 0>>
+std::optional<mlir::gpu::SerializedObject>
 XeVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
                                       const gpu::TargetOptions &options) const {
   if (!module)
@@ -383,7 +383,10 @@ XeVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
                       "without having the target built.");
 #endif
 
-    return serializer.run();
+    std::optional<SmallVector<char, 0>> binary = serializer.run();
+    if (!binary)
+      return std::nullopt;
+    return gpu::SerializedObject{std::move(*binary)};
   }
   module->emitError("Unsupported XeVM target triple: ") << xeTarget.getTriple();
   return std::nullopt;
@@ -391,7 +394,7 @@ XeVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
 
 Attribute
 XeVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
-                                 const SmallVector<char, 0> &object,
+                                 const mlir::gpu::SerializedObject &object,
                                  const gpu::TargetOptions &options) const {
   Builder builder(attribute.getContext());
   gpu::CompilationTarget format = options.getCompilationTarget();
@@ -407,6 +410,7 @@ XeVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
 
   return builder.getAttr<gpu::ObjectAttr>(
       attribute, format,
-      builder.getStringAttr(StringRef(object.data(), object.size())),
+      builder.getStringAttr(
+          StringRef(object.getObject().data(), object.getObject().size())),
       objectProps, /*kernels=*/nullptr);
 }
diff --git a/mlir/lib/Target/SPIRV/Target.cpp b/mlir/lib/Target/SPIRV/Target.cpp
index dd128e254aa0..be589b829287 100644
--- a/mlir/lib/Target/SPIRV/Target.cpp
+++ b/mlir/lib/Target/SPIRV/Target.cpp
@@ -30,12 +30,12 @@ namespace {
 class SPIRVTargetAttrImpl
     : public gpu::TargetAttrInterface::FallbackModel<SPIRVTargetAttrImpl> {
 public:
-  std::optional<SmallVector<char, 0>>
+  std::optional<mlir::gpu::SerializedObject>
   serializeToObject(Attribute attribute, Operation *module,
                     const gpu::TargetOptions &options) const;
 
   Attribute createObject(Attribute attribute, Operation *module,
-                         const SmallVector<char, 0> &object,
+                         const mlir::gpu::SerializedObject &object,
                          const gpu::TargetOptions &options) const;
 };
 } // namespace
@@ -56,7 +56,8 @@ void mlir::spirv::registerSPIRVTargetInterfaceExternalModels(
 }
 
 // Reuse from existing serializer
-std::optional<SmallVector<char, 0>> SPIRVTargetAttrImpl::serializeToObject(
+std::optional<mlir::gpu::SerializedObject>
+SPIRVTargetAttrImpl::serializeToObject(
     Attribute attribute, Operation *module,
     const gpu::TargetOptions &options) const {
   if (!module)
@@ -84,19 +85,20 @@ std::optional<SmallVector<char, 0>> SPIRVTargetAttrImpl::serializeToObject(
   std::memcpy(spvData.data(), spvBinary.data(), spvData.size());
 
   spvMod.erase();
-  return spvData;
+  return gpu::SerializedObject{std::move(spvData)};
 }
 
 // Prepare Attribute for gpu.binary with serialized kernel object
 Attribute
 SPIRVTargetAttrImpl::createObject(Attribute attribute, Operation *module,
-                                  const SmallVector<char, 0> &object,
+                                  const mlir::gpu::SerializedObject &object,
                                   const gpu::TargetOptions &options) const {
   gpu::CompilationTarget format = options.getCompilationTarget();
   DictionaryAttr objectProps;
   Builder builder(attribute.getContext());
   return builder.getAttr<gpu::ObjectAttr>(
       attribute, format,
-      builder.getStringAttr(StringRef(object.data(), object.size())),
+      builder.getStringAttr(
+          StringRef(object.getObject().data(), object.getObject().size())),
       objectProps, /*kernels=*/nullptr);
 }
diff --git a/mlir/test/Dialect/GPU/nvvm-attach-target.mlir b/mlir/test/Dialect/GPU/nvvm-attach-target.mlir
index baa3ae58dda1..f5febc7166d4 100644
--- a/mlir/test/Dialect/GPU/nvvm-attach-target.mlir
+++ b/mlir/test/Dialect/GPU/nvvm-attach-target.mlir
@@ -1,12 +1,14 @@
 // RUN: mlir-opt %s --nvvm-attach-target="" | FileCheck %s
 // RUN: mlir-opt %s --nvvm-attach-target="ptxas-cmd-options=--register-usage-level=8" | FileCheck %s -check-prefix=CHECK-OPTIONS
 // RUN: mlir-opt %s --nvvm-attach-target="verify-target-arch=false" | FileCheck %s -check-prefix=CHECK-DISABLE-VERIFYTARGET
+// RUN: mlir-opt %s --nvvm-attach-target="collect-compiler-diagnostics=true" | FileCheck %s -check-prefix=CHECK-DIAG
 
 module attributes {gpu.container_module} {
     // CHECK-LABEL:gpu.module @kernel_module1
     // CHECK: [#nvvm.target]
     // CHECK-OPTIONS: [#nvvm.target<flags = {"ptxas-cmd-options" = ["--register-usage-level=8"]}>]
     // CHECK-DISABLE-VERIFYTARGET: [#nvvm.target<verifyTarget = false>]
+    // CHECK-DIAG: [#nvvm.target<flags = {"collect-compiler-diagnostics"}>]
     gpu.module @kernel_module1 {
     llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr,
         %arg2: !llvm.ptr, %arg3: i64, %arg4: i64,
diff --git a/mlir/test/Integration/GPU/CUDA/module-to-binary-compiler-log.mlir b/mlir/test/Integration/GPU/CUDA/module-to-binary-compiler-log.mlir
new file mode 100644
index 000000000000..a5c89b5704c8
--- /dev/null
+++ b/mlir/test/Integration/GPU/CUDA/module-to-binary-compiler-log.mlir
@@ -0,0 +1,14 @@
+// RUN: mlir-opt %s --gpu-module-to-binary="format=%gpu_compilation_format opts=--verbose" \
+// RUN:   | FileCheck %s
+
+module attributes {gpu.container_module} {
+  // CHECK-LABEL: gpu.binary @kernel_module
+  // CHECK: properties = {{{.*}}ISACompilerLog = {{.*}}
+  gpu.module @kernel_module [#nvvm.target<chip = "sm_70", flags = {"collect-compiler-diagnostics"}>] {
+    llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr,
+        %arg2: !llvm.ptr, %arg3: i64, %arg4: i64,
+        %arg5: i64) attributes {gpu.kernel} {
+      llvm.return
+    }
+  }
+}
diff --git a/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp b/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
index af0af89c7d07..31c5aa3195db 100644
--- a/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
+++ b/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
@@ -86,15 +86,16 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMMToLLVM)) {
   ASSERT_TRUE(!!serializer);
   gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Offload);
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
         serializer.serializeToObject(gpuModule, options);
     // Check that the serializer was successful.
     ASSERT_TRUE(object != std::nullopt);
-    ASSERT_TRUE(!object->empty());
+    ASSERT_TRUE(!object->getObject().empty());
 
     // Read the serialized module.
-    llvm::MemoryBufferRef buffer(StringRef(object->data(), object->size()),
-                                 "module");
+    llvm::MemoryBufferRef buffer(
+        StringRef(object->getObject().data(), object->getObject().size()),
+        "module");
     llvm::LLVMContext llvmContext;
     llvm::Expected<std::unique_ptr<llvm::Module>> llvmModule =
         llvm::getLazyBitcodeModule(buffer, llvmContext);
@@ -122,15 +123,18 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMToPTX)) {
   ASSERT_TRUE(!!serializer);
   gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Assembly);
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
         serializer.serializeToObject(gpuModule, options);
     // Check that the serializer was successful.
     ASSERT_TRUE(object != std::nullopt);
-    ASSERT_TRUE(!object->empty());
+    ASSERT_TRUE(!object->getObject().empty());
 
     ASSERT_TRUE(
-        StringRef(object->data(), object->size()).contains("nvvm_kernel"));
-    ASSERT_TRUE(StringRef(object->data(), object->size()).count('\0') == 0);
+        StringRef(object->getObject().data(), object->getObject().size())
+            .contains("nvvm_kernel"));
+    ASSERT_TRUE(
+        StringRef(object->getObject().data(), object->getObject().size())
+            .count('\0') == 0);
   }
 }
 
@@ -153,11 +157,11 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMToBinary)) {
   ASSERT_TRUE(!!serializer);
   gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Binary);
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
         serializer.serializeToObject(gpuModule, options);
     // Check that the serializer was successful.
     ASSERT_TRUE(object != std::nullopt);
-    ASSERT_TRUE(!object->empty());
+    ASSERT_TRUE(!object->getObject().empty());
   }
 }
 
@@ -203,11 +207,11 @@ TEST_F(MLIRTargetLLVMNVVM,
                              optimizedCallback, isaCallback);
 
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
         serializer.serializeToObject(gpuModule, options);
 
     ASSERT_TRUE(object != std::nullopt);
-    ASSERT_TRUE(!object->empty());
+    ASSERT_TRUE(!object->getObject().empty());
     ASSERT_TRUE(!initialLLVMIR.empty());
     ASSERT_TRUE(!linkedLLVMIR.empty());
     ASSERT_TRUE(!optimizedLLVMIR.empty());
@@ -275,7 +279,7 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(LinkedLLVMIRResource)) {
                              gpu::CompilationTarget::Assembly, {}, {},
                              linkedCallback);
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
         serializer.serializeToObject(gpuModule, options);
 
     // Verify that we correctly linked in the library: the external call is
@@ -294,6 +298,6 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(LinkedLLVMIRResource)) {
       ASSERT_FALSE(bar->empty());
     }
     ASSERT_TRUE(object != std::nullopt);
-    ASSERT_TRUE(!object->empty());
+    ASSERT_TRUE(!object->getObject().empty());
   }
 }
diff --git a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp
index a015e1d7dde6..3c71df76bb6a 100644
--- a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp
+++ b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp
@@ -85,15 +85,16 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLToLLVM)) {
   ASSERT_TRUE(!!serializer);
   gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Offload);
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
         serializer.serializeToObject(gpuModule, options);
     // Check that the serializer was successful.
     ASSERT_TRUE(object != std::nullopt);
-    ASSERT_TRUE(!object->empty());
+    ASSERT_TRUE(!object->getObject().empty());
 
     // Read the serialized module.
-    llvm::MemoryBufferRef buffer(StringRef(object->data(), object->size()),
-                                 "module");
+    llvm::MemoryBufferRef buffer(
+        StringRef(object->getObject().data(), object->getObject().size()),
+        "module");
     llvm::LLVMContext llvmContext;
     llvm::Expected<std::unique_ptr<llvm::Module>> llvmModule =
         llvm::getLazyBitcodeModule(buffer, llvmContext);
@@ -121,11 +122,12 @@ TEST_F(MLIRTargetLLVMROCDL,
   ASSERT_TRUE(!!serializer);
   gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Assembly);
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
         serializer.serializeToObject(gpuModule, options);
     // Check that the serializer was successful.
-    EXPECT_TRUE(StringRef(object->data(), object->size())
-                    .contains(".amdhsa_code_object_version 6"));
+    EXPECT_TRUE(
+        StringRef(object->getObject().data(), object->getObject().size())
+            .contains(".amdhsa_code_object_version 6"));
   }
 }
 
@@ -147,11 +149,12 @@ TEST_F(MLIRTargetLLVMROCDL,
   ASSERT_TRUE(!!serializer);
   gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Assembly);
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
         serializer.serializeToObject(gpuModule, options);
     // Check that the serializer was successful.
-    EXPECT_TRUE(StringRef(object->data(), object->size())
-                    .contains(".amdhsa_code_object_version 4"));
+    EXPECT_TRUE(
+        StringRef(object->getObject().data(), object->getObject().size())
+            .contains(".amdhsa_code_object_version 4"));
   }
 }
 
@@ -171,14 +174,15 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLToPTX)) {
   ASSERT_TRUE(!!serializer);
   gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Assembly);
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
         serializer.serializeToObject(gpuModule, options);
     // Check that the serializer was successful.
     ASSERT_TRUE(object != std::nullopt);
-    ASSERT_TRUE(!object->empty());
+    ASSERT_TRUE(!object->getObject().empty());
 
     ASSERT_TRUE(
-        StringRef(object->data(), object->size()).contains("rocdl_kernel"));
+        StringRef(object->getObject().data(), object->getObject().size())
+            .contains("rocdl_kernel"));
   }
 }
 
@@ -201,11 +205,11 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLToBinary)) {
   ASSERT_TRUE(!!serializer);
   gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Binary);
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
         serializer.serializeToObject(gpuModule, options);
     // Check that the serializer was successful.
     ASSERT_TRUE(object != std::nullopt);
-    ASSERT_FALSE(object->empty());
+    ASSERT_FALSE(object->getObject().empty());
   }
 }
 
@@ -245,16 +249,16 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(GetELFMetadata)) {
   ASSERT_TRUE(!!serializer);
   gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Binary);
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
-    std::optional<SmallVector<char, 0>> object =
+    std::optional<mlir::gpu::SerializedObject> object =
         serializer.serializeToObject(gpuModule, options);
     // Check that the serializer was successful.
     ASSERT_TRUE(object != std::nullopt);
-    ASSERT_FALSE(object->empty());
+    ASSERT_FALSE(object->getObject().empty());
     if (!object)
       continue;
     // Get the metadata.
     gpu::KernelTableAttr metadata =
-        ROCDL::getKernelMetadata(gpuModule, *object);
+        ROCDL::getKernelMetadata(gpuModule, object->getObject());
     ASSERT_TRUE(metadata != nullptr);
     // There should be 4 kernels.
     ASSERT_TRUE(metadata.size() == 4);
diff --git a/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp b/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp
index 3c880edee4ff..5271923d923e 100644
--- a/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp
+++ b/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp
@@ -37,12 +37,12 @@ namespace {
 class TargetAttrImpl
     : public gpu::TargetAttrInterface::FallbackModel<TargetAttrImpl> {
 public:
-  std::optional<SmallVector<char, 0>>
+  std::optional<mlir::gpu::SerializedObject>
   serializeToObject(Attribute attribute, Operation *module,
                     const gpu::TargetOptions &options) const;
 
   Attribute createObject(Attribute attribute, Operation *module,
-                         const SmallVector<char, 0> &object,
+                         const mlir::gpu::SerializedObject &object,
                          const gpu::TargetOptions &options) const;
 };
 } // namespace
@@ -82,13 +82,15 @@ TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(SerializeToLLVMBitcode)) {
   std::string targetTriple = llvm::sys::getProcessTriple();
   LLVM::ModuleToObject serializer(*(module->getOperation()), targetTriple, "",
                                   "");
-  std::optional<SmallVector<char, 0>> serializedModule = serializer.run();
+  std::optional<mlir::gpu::SerializedObject> serializedModule =
+      serializer.run();
   ASSERT_TRUE(!!serializedModule);
-  ASSERT_TRUE(!serializedModule->empty());
+  ASSERT_TRUE(!serializedModule->getObject().empty());
 
   // Read the serialized module.
-  llvm::MemoryBufferRef buffer(
-      StringRef(serializedModule->data(), serializedModule->size()), "module");
+  llvm::MemoryBufferRef buffer(StringRef(serializedModule->getObject().data(),
+                                         serializedModule->getObject().size()),
+                               "module");
   llvm::LLVMContext llvmContext;
   llvm::Expected<std::unique_ptr<llvm::Module>> llvmModule =
       llvm::getLazyBitcodeModule(buffer, llvmContext);
@@ -99,7 +101,7 @@ TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(SerializeToLLVMBitcode)) {
   ASSERT_TRUE((*llvmModule)->getFunction("foo") != nullptr);
 }
 
-std::optional<SmallVector<char, 0>>
+std::optional<mlir::gpu::SerializedObject>
 TargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
                                   const gpu::TargetOptions &options) const {
   // Set a dummy attr to be retrieved by `createObject`.
@@ -113,14 +115,15 @@ TargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
 
 Attribute
 TargetAttrImpl::createObject(Attribute attribute, Operation *module,
-                             const SmallVector<char, 0> &object,
+                             const mlir::gpu::SerializedObject &object,
                              const gpu::TargetOptions &options) const {
   // Create a GPU object with the GPU module dictionary as the object
   // properties.
   return gpu::ObjectAttr::get(
       module->getContext(), attribute, gpu::CompilationTarget::Offload,
-      StringAttr::get(module->getContext(),
-                      StringRef(object.data(), object.size())),
+      StringAttr::get(
+          module->getContext(),
+          StringRef(object.getObject().data(), object.getObject().size())),
       module->getAttrDictionary(), /*kernels=*/nullptr);
 }
 
@@ -140,11 +143,11 @@ TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(TargetAttrAPI)) {
   // Check the attribute holds the interface.
   ASSERT_TRUE(!!targetAttr);
   gpu::TargetOptions opts;
-  std::optional<SmallVector<char, 0>> serializedBinary =
+  std::optional<mlir::gpu::SerializedObject> serializedBinary =
       targetAttr.serializeToObject(*module, opts);
   // Check the serialized string.
   ASSERT_TRUE(!!serializedBinary);
-  ASSERT_TRUE(!serializedBinary->empty());
+  ASSERT_TRUE(!serializedBinary->getObject().empty());
   // Create the object attribute.
   auto object = cast<gpu::ObjectAttr>(
       targetAttr.createObject(*module, *serializedBinary, opts));
@@ -176,11 +179,11 @@ TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(CallbackInvokedWithInitialLLVMIR)) {
   gpu::TargetOptions opts(
       {}, {}, {}, {}, mlir::gpu::TargetOptions::getDefaultCompilationTarget(),
       {}, initialCallback);
-  std::optional<SmallVector<char, 0>> serializedBinary =
+  std::optional<mlir::gpu::SerializedObject> serializedBinary =
       targetAttr.serializeToObject(*module, opts);
 
   ASSERT_TRUE(serializedBinary != std::nullopt);
-  ASSERT_TRUE(!serializedBinary->empty());
+  ASSERT_TRUE(!serializedBinary->getObject().empty());
   ASSERT_TRUE(!initialLLVMIR.empty());
 }
 
@@ -204,11 +207,11 @@ TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(CallbackInvokedWithLinkedLLVMIR)) {
   gpu::TargetOptions opts(
       {}, {}, {}, {}, mlir::gpu::TargetOptions::getDefaultCompilationTarget(),
       {}, {}, linkedCallback);
-  std::optional<SmallVector<char, 0>> serializedBinary =
+  std::optional<mlir::gpu::SerializedObject> serializedBinary =
       targetAttr.serializeToObject(*module, opts);
 
   ASSERT_TRUE(serializedBinary != std::nullopt);
-  ASSERT_TRUE(!serializedBinary->empty());
+  ASSERT_TRUE(!serializedBinary->getObject().empty());
   ASSERT_TRUE(!linkedLLVMIR.empty());
 }
 
@@ -233,10 +236,10 @@ TEST_F(MLIRTargetLLVM,
   gpu::TargetOptions opts(
       {}, {}, {}, {}, mlir::gpu::TargetOptions::getDefaultCompilationTarget(),
       {}, {}, {}, optimizedCallback);
-  std::optional<SmallVector<char, 0>> serializedBinary =
+  std::optional<mlir::gpu::SerializedObject> serializedBinary =
       targetAttr.serializeToObject(*module, opts);
 
   ASSERT_TRUE(serializedBinary != std::nullopt);
-  ASSERT_TRUE(!serializedBinary->empty());
+  ASSERT_TRUE(!serializedBinary->getObject().empty());
   ASSERT_TRUE(!optimizedLLVMIR.empty());
 }