[mlir][func] Move return-type verification from ReturnOp to FuncOp (#184153)

Move the operand count and type checks for func.return from ReturnOp::verify() into a new FuncOp::verify(). The verifier iterates all blocks in the callable region, skipping terminators that are not func.return (e.g. llvm.return or test.return that may appear during dialect conversion). Fix several invalid-IR tests that had func.func return types inconsistent with the actual func.return operands. Previously these mismatches were silent because block verification stopped at an earlier expected error before reaching the func.return; now that FuncOp::verify() runs before body verification, the return types must be consistent.
2026-03-03 12:18:27 +01:00 · 2026-03-03 12:18:27 +01:00 · ecec7920c6
commit ecec7920c6
parent 263a22e865
18 changed files with 73 additions and 64 deletions
--- a/mlir/include/mlir/Dialect/Func/IR/FuncOps.td
+++ b/mlir/include/mlir/Dialect/Func/IR/FuncOps.td
@ -357,6 +357,7 @@ def FuncOp : Func_Op<"func", [
    bool isDeclaration() { return isExternal(); }
  }];
  let hasCustomAssemblyFormat = 1;
+  let hasVerifier = 1;
 }

 //===----------------------------------------------------------------------===//
@ -389,7 +390,6 @@ def ReturnOp : Func_Op<"return", [Pure, HasParent<"FuncOp">,
  }]>];

  let assemblyFormat = "attr-dict ($operands^ `:` type($operands))?";
-  let hasVerifier = 1;
 }

 #endif // MLIR_DIALECT_FUNC_IR_FUNCOPS_TD
--- a/mlir/lib/Dialect/Func/IR/FuncOps.cpp
+++ b/mlir/lib/Dialect/Func/IR/FuncOps.cpp
@ -284,23 +284,37 @@ FuncOp FuncOp::clone() {
 // ReturnOp
 //===----------------------------------------------------------------------===//

-LogicalResult ReturnOp::verify() {
-  auto function = cast<FuncOp>((*this)->getParentOp());
+LogicalResult FuncOp::verify() {
+  // External declarations have no body to check.
+  if (isDeclaration())
+    return success();
+  // Hoist the result types once; they are the same for every return site.
+  auto resultTypes = getFunctionType().getResults();
+  for (Block &block : getBody()) {
+    if (block.empty())
+      continue;
+    // Check func.return or other return-like terminators ops (e.g.
+    // llvm.return, test.return).
+    auto returnOp = dyn_cast<RegionBranchTerminatorOpInterface>(&block.back());
+    if (!returnOp)
+      continue;

-  // The operand number and types must match the function signature.
-  const auto &results = function.getFunctionType().getResults();
-  if (getNumOperands() != results.size())
-    return emitOpError("has ")
-           << getNumOperands() << " operands, but enclosing function (@"
-           << function.getName() << ") returns " << results.size();
+    if (returnOp->getNumOperands() != resultTypes.size())
+      return returnOp->emitOpError("has ")
+             << returnOp->getNumOperands()
+             << " operands, but enclosing function (@" << getName()
+             << ") returns " << resultTypes.size();

-  for (unsigned i = 0, e = results.size(); i != e; ++i)
-    if (getOperand(i).getType() != results[i])
-      return emitError() << "type of return operand " << i << " ("
-                         << getOperand(i).getType()
-                         << ") doesn't match function result type ("
-                         << results[i] << ")"
-                         << " in function @" << function.getName();
+    for (auto [i, opType] :
+         llvm::enumerate(llvm::zip(returnOp->getOperandTypes(), resultTypes))) {
+      auto [opTy, resTy] = opType;
+      if (opTy != resTy)
+        return returnOp->emitError()
+               << "type of return operand " << i << " (" << opTy
+               << ") doesn't match function result type (" << resTy
+               << ") in function @" << getName();
+    }
+  }

  return success();
 }
--- a/mlir/test/Conversion/FuncToLLVM/func-memref-return.mlir
+++ b/mlir/test/Conversion/FuncToLLVM/func-memref-return.mlir
@ -87,13 +87,6 @@ func.func @check_memref_func_call(%in : memref<10xi8>) -> memref<20xi8> {
  return %res : memref<20xi8>
 }

-// BAREPTR-LABEL: func @check_return(
-// BAREPTR-SAME: %{{.*}}: memref<?xi8>) -> memref<?xi8>
-func.func @check_return(%in : memref<?xi8>) -> memref<?xi8> {
-  // BAREPTR: llvm.return {{.*}} : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-  return %in : memref<?xi8>
-}
-
 // BAREPTR-LABEL: func @unconvertible_multiresult
 // BAREPTR-SAME: %{{.*}}: memref<?xf32>, %{{.*}}: memref<?xf32>) -> (memref<?xf32>, memref<?xf32>)
 func.func @unconvertible_multiresult(%arg0: memref<?xf32> , %arg1: memref<?xf32>) -> (memref<?xf32>, memref<?xf32>) {
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-32b.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-32b.mlir
@ -67,7 +67,7 @@ module attributes {transform.with_named_sequence} {
        {index_bitwidth = 32, use_opaque_pointers = true}
    } {
      legal_dialects = ["llvm", "memref", "nvvm"],
-      legal_ops = ["func.func", "gpu.module", "gpu.yield"],
+      legal_ops = ["gpu.module", "gpu.yield"],
      illegal_dialects = ["gpu"],
      illegal_ops = ["llvm.cos", "llvm.exp", "llvm.exp2", "llvm.fabs", "llvm.fceil",
                    "llvm.ffloor", "llvm.log", "llvm.log10", "llvm.log2", "llvm.pow",
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@ -1031,7 +1031,7 @@ module attributes {transform.with_named_sequence} {
        use_bare_ptr_call_conv = false}
    } {
      legal_dialects = ["llvm", "memref", "nvvm", "test"],
-      legal_ops = ["func.func", "gpu.module", "gpu.yield"],
+      legal_ops = ["gpu.module", "gpu.yield"],
      illegal_dialects = ["gpu"],
      illegal_ops = ["llvm.copysign", "llvm.cos", "llvm.exp", "llvm.exp2", "llvm.fabs", "llvm.fceil",
                    "llvm.ffloor", "llvm.frem", "llvm.log", "llvm.log10", "llvm.log2", "llvm.pow",
--- a/mlir/test/Dialect/EmitC/invalid_types.mlir
+++ b/mlir/test/Dialect/EmitC/invalid_types.mlir
@ -84,7 +84,7 @@ func.func @illegal_array_with_lvalue_element_type(

 // -----

-func.func @illegal_integer_type(%arg0: i11, %arg1: i11) -> i11 {
+func.func @illegal_integer_type(%arg0: i11, %arg1: i11) {
    // expected-error @+1 {{'emitc.mul' op operand #0 must be floating-point type supported by EmitC or integer, index or opaque type supported by EmitC, but got 'i11'}}
    %mul = "emitc.mul" (%arg0, %arg1) : (i11, i11) -> i11
    return
--- a/mlir/test/Dialect/LLVMIR/invalid.mlir
+++ b/mlir/test/Dialect/LLVMIR/invalid.mlir
@ -687,7 +687,7 @@ func.func @nvvm_invalid_shfl_pred_3(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3
 func.func @nvvm_invalid_mma_0(%a0 : f16, %a1 : f16,
                         %b0 : vector<2xf16>, %b1 : vector<2xf16>,
                         %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32,
-                         %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) {
+                         %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> {
  // expected-error@+1 {{Could not match types for the A operands; expected one of 2xvector<2xf16> but got f16, f16}}
  %0 = nvvm.mma.sync A[%a0, %a1] B[%b0, %b1] C[%c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7]
    {layoutA=#nvvm.mma_layout<row>, layoutB=#nvvm.mma_layout<col>, shape = #nvvm.shape<m = 8, n = 8, k = 4>} : (f16, vector<2xf16>, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
@ -699,7 +699,7 @@ func.func @nvvm_invalid_mma_0(%a0 : f16, %a1 : f16,
 func.func @nvvm_invalid_mma_1(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
                         %b0 : vector<2xf16>, %b1 : vector<2xf16>,
                         %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32,
-                         %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) {
+                         %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f16)> {
  // expected-error@+1 {{Could not match allowed types for the result; expected one of !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>, !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> but got !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f16)>}}
  %0 = nvvm.mma.sync A[%a0, %a1] B[%b0, %b1] C[%c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7]
    {layoutA=#nvvm.mma_layout<row>, layoutB=#nvvm.mma_layout<col>, shape = #nvvm.shape<m = 8, n = 8, k = 4>} : (vector<2xf16>, vector<2xf16>, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f16)>
@ -711,7 +711,7 @@ func.func @nvvm_invalid_mma_1(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
 func.func @nvvm_invalid_mma_2(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
                         %b0 : vector<2xf16>, %b1 : vector<2xf16>,
                         %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32,
-                         %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) {
+                         %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> {
  // expected-error@+1 {{op requires attribute 'layoutA'}}
  %0 = nvvm.mma.sync A[%a0, %a1] B[%b0, %b1] C[%c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7]
    {shape = #nvvm.shape<m = 8, n = 8, k = 4>}: (vector<2xf16>, vector<2xf16>, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
@ -722,7 +722,7 @@ func.func @nvvm_invalid_mma_2(%a0 : vector<2xf16>, %a1 : vector<2xf16>,

 func.func @nvvm_invalid_mma_3(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
                         %b0 : vector<2xf16>, %b1 : vector<2xf16>,
-                         %c0 : vector<2xf16>, %c1 : vector<2xf16>) {
+                         %c0 : vector<2xf16>, %c1 : vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> {
  // expected-error@+1 {{unimplemented variant for MMA shape <8, 8, 16>}}
  %0 = nvvm.mma.sync A[%a0, %a1] B[%b0, %b1] C[%c0, %c1] {layoutA=#nvvm.mma_layout<row>, layoutB=#nvvm.mma_layout<col>, shape = #nvvm.shape<m = 8, n = 8, k = 16>} : (vector<2xf16>, vector<2xf16>, vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)>
  llvm.return %0 : !llvm.struct<(vector<2xf16>, vector<2xf16>)>
@ -732,7 +732,7 @@ func.func @nvvm_invalid_mma_3(%a0 : vector<2xf16>, %a1 : vector<2xf16>,

 func.func @nvvm_invalid_mma_8(%a0 : i32, %a1 : i32,
                               %b0 : i32,
-                               %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) {
+                               %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32, i32, i32, i32)> {
  // expected-error@+1 {{op requires b1Op attribute}}
  %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3]
    {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>,
--- a/mlir/test/Dialect/LLVMIR/nvvm.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@ -143,7 +143,7 @@ func.func @llvm_nvvm_bar_warp_sync(%mask : i32) {
 // CHECK-LABEL: @nvvm_mma_m8n8k4_row_col_f32_f32
 func.func @nvvm_mma_m8n8k4_row_col_f32_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
               %b0 : vector<2xf16>, %b1 : vector<2xf16>,
-               %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32, %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) {
+               %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32, %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> {
  // CHECK: nvvm.mma.sync
  %0 = nvvm.mma.sync A[%a0, %a1] B[%b0, %b1] C[%c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7]
    {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>,
@ -154,7 +154,7 @@ func.func @nvvm_mma_m8n8k4_row_col_f32_f32(%a0 : vector<2xf16>, %a1 : vector<2xf
 // CHECK-LABEL: @nvvm_mma_m8n8k4_f16_f16
 func.func @nvvm_mma_m8n8k4_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
                              %b0 : vector<2xf16>, %b1 : vector<2xf16>,
-                              %c0 : vector<2xf16>, %c1 : vector<2xf16>, %c2 : vector<2xf16>, %c3 : vector<2xf16>) {
+                              %c0 : vector<2xf16>, %c1 : vector<2xf16>, %c2 : vector<2xf16>, %c3 : vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> {
  // CHECK: nvvm.mma.sync A[{{.*}}] B[{{.*}}] C[{{.*}}]
  %0 = nvvm.mma.sync A[%a0, %a1] B[%b0, %b1] C[%c0, %c1, %c2, %c3]
    {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>,
@ -164,7 +164,7 @@ func.func @nvvm_mma_m8n8k4_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>,

 // CHECK-LABEL: @nvvm_mma_m16n8k8_bf16_bf16
 func.func @nvvm_mma_m16n8k8_bf16_bf16(%a0 : i32, %a1 : i32, %b0 : i32,
-                              %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) {
+                              %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) -> !llvm.struct<(f32, f32, f32, f32)> {
  // CHECK: nvvm.mma.sync A[{{.*}}] B[{{.*}}] C[{{.*}}] {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>, multiplicandAPtxType = #nvvm.mma_type<bf16>, multiplicandBPtxType = #nvvm.mma_type<bf16>, shape = #nvvm.shape<m = 16, n = 8, k = 8>} : (i32, i32, f32) -> !llvm.struct<(f32, f32, f32, f32)>
  %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3]
    {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>,
@ -176,7 +176,7 @@ func.func @nvvm_mma_m16n8k8_bf16_bf16(%a0 : i32, %a1 : i32, %b0 : i32,
 // CHECK-LABEL: @nvvm_mma_m16n8k16_bf16_bf16
 func.func @nvvm_mma_m16n8k16_bf16_bf16(%a0 : i32, %a1 : i32, %a2 : i32, %a3 : i32,
                              %b0 : i32, %b1 : i32,
-                              %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) {
+                              %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) -> !llvm.struct<(f32, f32, f32, f32)> {
  // CHECK: nvvm.mma.sync A[{{.*}}] B[{{.*}}] C[{{.*}}] {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>, multiplicandAPtxType = #nvvm.mma_type<bf16>, multiplicandBPtxType = #nvvm.mma_type<bf16>, shape = #nvvm.shape<m = 16, n = 8, k = 16>} : (i32, i32, f32) -> !llvm.struct<(f32, f32, f32, f32)>
  %0 = nvvm.mma.sync A[%a0, %a1, %a2, %a3] B[%b0, %b1] C[%c0, %c1, %c2, %c3]
    {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>,
@ -187,7 +187,7 @@ func.func @nvvm_mma_m16n8k16_bf16_bf16(%a0 : i32, %a1 : i32, %a2 : i32, %a3 : i3

 // CHECK-LABEL: @nvvm_mma_m8n8k16_s8_s8
 func.func @nvvm_mma_m8n8k16_s8_s8(%a0 : i32, %b0 : i32,
-                             %c0 : i32, %c1 : i32) {
+                             %c0 : i32, %c1 : i32) -> !llvm.struct<(i32, i32)> {
  // CHECK: nvvm.mma.sync A[{{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}] {intOverflowBehavior = #nvvm.mma_int_overflow<wrapped>, layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>, multiplicandAPtxType = #nvvm.mma_type<s8>, multiplicandBPtxType = #nvvm.mma_type<s8>, shape = #nvvm.shape<m = 8, n = 8, k = 16>} : (i32, i32, i32) -> !llvm.struct<(i32, i32)>
  %0 = nvvm.mma.sync A[%a0] B[%b0] C[%c0, %c1]
    {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>,
@ -200,7 +200,7 @@ func.func @nvvm_mma_m8n8k16_s8_s8(%a0 : i32, %b0 : i32,
 // CHECK-LABEL: @nvvm_mma_m16n8k8_f16_f16
 func.func @nvvm_mma_m16n8k8_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
                               %b0 : vector<2xf16>,
-                               %c0 : vector<2xf16>, %c1 : vector<2xf16>) {
+                               %c0 : vector<2xf16>, %c1 : vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> {
  // CHECK: nvvm.mma.sync A[%{{.*}}, %{{.*}}] B[%{{.*}}] C[%{{.*}}, %{{.*}}] {{{.*}}} : (vector<2xf16>, vector<2xf16>, vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)>
  %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1]
    {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>,
@ -212,7 +212,7 @@ func.func @nvvm_mma_m16n8k8_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
 func.func @nvvm_mma_m16n8k16_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
                                %a2 : vector<2xf16>, %a3 : vector<2xf16>,
                                %b0 : vector<2xf16>, %b1 : vector<2xf16>,
-                                %c0 : vector<2xf16>, %c1 : vector<2xf16>) {
+                                %c0 : vector<2xf16>, %c1 : vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> {
  // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}, {{.*}}, {{.*}}] B[{{.*}}, {{.*}}] C[{{.*}}, {{.*}}] {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>, shape = #nvvm.shape<m = 16, n = 8, k = 16>} : (vector<2xf16>, vector<2xf16>, vector<2xf16>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)>
  %0 = nvvm.mma.sync A[%a0, %a1, %a2, %a3] B[%b0, %b1] C[%c0, %c1]
    {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>,
@ -224,7 +224,7 @@ func.func @nvvm_mma_m16n8k16_f16_f16(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
 func.func @nvvm_mma_m16n8k16_f32_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
                                %a2 : vector<2xf16>, %a3 : vector<2xf16>,
                                %b0 : vector<2xf16>, %b1 : vector<2xf16>,
-                                %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) {
+                                %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) -> !llvm.struct<(f32, f32, f32, f32)> {
  // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}, {{.*}}, {{.*}}] B[{{.*}}, {{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>, shape = #nvvm.shape<m = 16, n = 8, k = 16>} : (vector<2xf16>, vector<2xf16>, f32) -> !llvm.struct<(f32, f32, f32, f32)>
  %0 = nvvm.mma.sync A[%a0, %a1, %a2, %a3] B[%b0, %b1] C[%c0, %c1, %c2, %c3]
    {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>,
@ -235,7 +235,7 @@ func.func @nvvm_mma_m16n8k16_f32_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
 // CHECK-LABEL: @nvvm_mma_m16n8k4_tf32_f32
 func.func @nvvm_mma_m16n8k4_tf32_f32(%a0 : i32, %a1 : i32,
                                     %b0 : i32,
-                                     %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) {
+                                     %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32) -> !llvm.struct<(f32, f32, f32, f32)> {
  // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>, multiplicandAPtxType = #nvvm.mma_type<tf32>, multiplicandBPtxType = #nvvm.mma_type<tf32>, shape = #nvvm.shape<m = 16, n = 8, k = 4>} : (i32, i32, f32) -> !llvm.struct<(f32, f32, f32, f32)>
  %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3]
    {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>,
@ -246,7 +246,7 @@ func.func @nvvm_mma_m16n8k4_tf32_f32(%a0 : i32, %a1 : i32,

 // CHECK-LABEL: @nvvm_mma_m16n8k16_s8_s8
 func.func @nvvm_mma_m16n8k16_s8_s8(%a0 : i32, %a1 : i32, %b0 : i32,
-                              %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) {
+                              %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32, i32, i32, i32)> {
  // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {intOverflowBehavior = #nvvm.mma_int_overflow<wrapped>, layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>, multiplicandAPtxType = #nvvm.mma_type<s8>, multiplicandBPtxType = #nvvm.mma_type<s8>, shape = #nvvm.shape<m = 16, n = 8, k = 16>} : (i32, i32, i32) -> !llvm.struct<(i32, i32, i32, i32)>
  %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3]
    {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>,
@ -259,7 +259,7 @@ func.func @nvvm_mma_m16n8k16_s8_s8(%a0 : i32, %a1 : i32, %b0 : i32,
 // CHECK-LABEL: @nvvm_mma_m16n8k16_s8_u8
 func.func @nvvm_mma_m16n8k16_s8_u8(%a0 : i32, %a1 : i32,
                                %b0 : i32,
-                                %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) {
+                                %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32, i32, i32, i32)> {
  // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {intOverflowBehavior = #nvvm.mma_int_overflow<satfinite>, layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>, multiplicandAPtxType = #nvvm.mma_type<s8>, multiplicandBPtxType = #nvvm.mma_type<u8>, shape = #nvvm.shape<m = 16, n = 8, k = 16>} : (i32, i32, i32) -> !llvm.struct<(i32, i32, i32, i32)>
  %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3]
    {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>,
@ -272,7 +272,7 @@ func.func @nvvm_mma_m16n8k16_s8_u8(%a0 : i32, %a1 : i32,
 // CHECK-LABEL: @nvvm_mma_m16n8k256_b1_b1
 func.func @nvvm_mma_m16n8k256_b1_b1(%a0 : i32, %a1 : i32, %a2 : i32, %a3 : i32,
                               %b0 : i32, %b1 : i32,
-                               %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) {
+                               %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32, i32, i32, i32)> {
  // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}, {{.*}}, {{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {b1Op = #nvvm.mma_b1op<xor_popc>, layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>, multiplicandAPtxType = #nvvm.mma_type<b1>, multiplicandBPtxType = #nvvm.mma_type<b1>, shape = #nvvm.shape<m = 16, n = 8, k = 256>} : (i32, i32, i32) -> !llvm.struct<(i32, i32, i32, i32)>
  %0 = nvvm.mma.sync A[%a0, %a1, %a2, %a3] B[%b0, %b1] C[%c0, %c1, %c2, %c3]
    {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>,
@ -284,7 +284,7 @@ func.func @nvvm_mma_m16n8k256_b1_b1(%a0 : i32, %a1 : i32, %a2 : i32, %a3 : i32,
 // CHECK-LABEL: @nvvm_mma_m16n8k128_b1_b1
 func.func @nvvm_mma_m16n8k128_b1_b1(%a0 : i32, %a1 : i32,
                               %b0 : i32,
-                               %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) {
+                               %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32, i32, i32, i32)> {
  // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {b1Op = #nvvm.mma_b1op<xor_popc>, layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>, multiplicandAPtxType = #nvvm.mma_type<b1>, multiplicandBPtxType = #nvvm.mma_type<b1>, shape = #nvvm.shape<m = 16, n = 8, k = 128>} : (i32, i32, i32) -> !llvm.struct<(i32, i32, i32, i32)>
  %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3]
    {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>,
@ -297,7 +297,7 @@ func.func @nvvm_mma_m16n8k128_b1_b1(%a0 : i32, %a1 : i32,
 // CHECK-LABEL: @nvvm_mma_m8n8k128_b1_b1
 func.func @nvvm_mma_m8n8k128_b1_b1(%a0 : i32,
                              %b0 : i32,
-                              %c0 : i32, %c1 : i32) {
+                              %c0 : i32, %c1 : i32) -> !llvm.struct<(i32, i32)> {
  // CHECK: nvvm.mma.sync A[{{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}] {b1Op = #nvvm.mma_b1op<xor_popc>, layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>, multiplicandAPtxType = #nvvm.mma_type<b1>, multiplicandBPtxType = #nvvm.mma_type<b1>, shape = #nvvm.shape<m = 8, n = 8, k = 128>} : (i32, i32, i32) -> !llvm.struct<(i32, i32)>
  %0 = nvvm.mma.sync A[%a0] B[%b0] C[%c0, %c1]
    {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>,
@ -309,7 +309,7 @@ func.func @nvvm_mma_m8n8k128_b1_b1(%a0 : i32,
 // CHECK-LABEL: @nvvm_mma_m16n8k32_s4_s4
 func.func @nvvm_mma_m16n8k32_s4_s4(%a0 : i32, %a1 : i32,
                               %b0 : i32,
-                               %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) {
+                               %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32, i32, i32, i32)> {
  // CHECK: nvvm.mma.sync A[{{.*}}, {{.*}}] B[{{.*}}] C[{{.*}}, {{.*}}, {{.*}}, {{.*}}] {intOverflowBehavior = #nvvm.mma_int_overflow<wrapped>, layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>, multiplicandAPtxType = #nvvm.mma_type<s4>, multiplicandBPtxType = #nvvm.mma_type<s4>, shape = #nvvm.shape<m = 16, n = 8, k = 32>} : (i32, i32, i32) -> !llvm.struct<(i32, i32, i32, i32)>
  %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3]
    {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>,
--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@ -17,7 +17,7 @@ func.func @store_number_of_indices(%v : memref<f32>) {

 // -----

-func.func @yield_parent(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
+func.func @yield_parent(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>) -> memref<?xf32, affine_map<(i)[off]->(off + i)>> {
  // expected-error @+1 {{op expected parent op with LinalgOp interface}}
  linalg.yield %arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>
 }
@ -983,7 +983,7 @@ func.func @reduce_wrong_block_argument_input_type(
 func.func @reduce_wrong_block_argument_output_type(
    %input1: tensor<16x32x64xf32>,
    %init1: tensor<16x64xf32>, %input2: tensor<16x32x64xf32>,
-    %init2: tensor<16x64xf64>)  -> (tensor<16x64xf32>, tensor<16x64xf32>) {
+    %init2: tensor<16x64xf64>)  -> (tensor<16x64xf32>, tensor<16x64xf64>) {
  // expected-error @+1{{'linalg.reduce' op output element type 'f64' does not match corresponding block argument type 'f32'}}
  %reduce, %reduce2 = linalg.reduce
      ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>)
@ -1923,7 +1923,7 @@ func.func @unpack_invalid_output_rank(%input: tensor<256x128xf32>, %output: tens

 // -----

-func.func @unpack_invalid_out_of_bound_outer_perm(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<8x8x32x16xf32> {
+func.func @unpack_invalid_out_of_bound_outer_perm(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<256x128xf32> {
  // expected-error@+1 {{invalid outer_dims_perm vector}}
  %0 = linalg.unpack %output outer_dims_perm = [2, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %input : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
  return %0 : tensor<256x128xf32>
--- a/mlir/test/Dialect/NVGPU/invalid.mlir
+++ b/mlir/test/Dialect/NVGPU/invalid.mlir
@ -40,7 +40,7 @@ func.func @ldmatrix_trans_f32_x4(%arg0: memref<128x128xf32, 3>) ->  vector<4x1xf
 }
 // -----

-func.func @ldmatrix_trans_f32_x4(%arg0: memref<128x128xf32, 3>) ->  vector<4x1xf32> {
+func.func @ldmatrix_trans_f32_x4(%arg0: memref<128x128xf32, 3>) ->  vector<4xf32> {
  %c0  = arith.constant 0 : index
  // expected-error @+1 {{results must be 2 dimensional vector}}
  %a = nvgpu.ldmatrix %arg0[%c0, %c0] {transpose = false, numTiles = 4 : i32} : memref<128x128xf32, 3> -> vector<4xf32>
--- a/mlir/test/Dialect/SparseTensor/invalid.mlir
+++ b/mlir/test/Dialect/SparseTensor/invalid.mlir
@ -240,7 +240,7 @@ func.func @sparse_get_md(%arg0: !sparse_tensor.storage_specifier<#SparseVector>)

 #SparseVector = #sparse_tensor.encoding<{map = (d0) -> (d0 : compressed)}>

-func.func @sparse_get_md(%arg0: !sparse_tensor.storage_specifier<#SparseVector>) -> i64 {
+func.func @sparse_get_md(%arg0: !sparse_tensor.storage_specifier<#SparseVector>) -> index {
  // expected-error@+1 {{requested slice data on non-slice tensor}}
  %0 = sparse_tensor.storage_specifier.get %arg0 dim_offset at 0
       : !sparse_tensor.storage_specifier<#SparseVector>
--- a/mlir/test/Dialect/Tosa/invalid.mlir
+++ b/mlir/test/Dialect/Tosa/invalid.mlir
@ -1030,7 +1030,7 @@ func.func @test_conv2d_rank0_zp(%arg0: tensor<1x29x29x4xi8>, %arg1: tensor<16x3x
 // -----

 // CHECK-LABEL: test_negate_same_element_type
-func.func @test_negate_same_element_type(%arg0: tensor<8x8xf32>, %arg1: tensor<1xf32>, %arg2: tensor<1xf32>) -> tensor<8x8xf32> {
+func.func @test_negate_same_element_type(%arg0: tensor<8x8xf32>, %arg1: tensor<1xf32>, %arg2: tensor<1xf32>) -> tensor<8x8xi32> {
  // expected-error@+1 {{'tosa.negate' op expect input and output to have same element type, got 'f32' and 'i32'}}
  %0 = tosa.negate %arg0, %arg1, %arg2 : (tensor<8x8xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<8x8xi32>
  return %0 : tensor<8x8xi32>
@ -1039,7 +1039,7 @@ func.func @test_negate_same_element_type(%arg0: tensor<8x8xf32>, %arg1: tensor<1
 // -----

 // CHECK-LABEL: test_negate_same_shape
-func.func @test_negate_same_shape(%arg0: tensor<8x8xf32>, %arg1: tensor<1xf32>, %arg2: tensor<1xf32>) -> tensor<8x8xf32> {
+func.func @test_negate_same_shape(%arg0: tensor<8x8xf32>, %arg1: tensor<1xf32>, %arg2: tensor<1xf32>) -> tensor<8x6xf32> {
  // expected-error@+1 {{'tosa.negate' op requires the same shape for input1 and output}}
  %0 = tosa.negate %arg0, %arg1, %arg2 : (tensor<8x8xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<8x6xf32>
  return %0 : tensor<8x6xf32>
@ -1463,7 +1463,7 @@ func.func @test_rfft2d_width_input_output_match(%arg0: tensor<1x4x8xf16>) -> (te

 // -----

-func.func @test_argmax_invalid_output_shape(%arg0: tensor<1x2x3xf32>) -> tensor<1x2x3xf32> {
+func.func @test_argmax_invalid_output_shape(%arg0: tensor<1x2x3xf32>) -> tensor<1x2x3xi32> {
  // expected-error@+1 {{'tosa.argmax' op expected output shape '2, 3', got '1, 2, 3'}}
  %0 = tosa.argmax %arg0 {axis = 0 : i32}: (tensor<1x2x3xf32>) -> tensor<1x2x3xi32>
  return %0 : tensor<1x2x3xi32>
--- a/mlir/test/Dialect/Vector/invalid.mlir
+++ b/mlir/test/Dialect/Vector/invalid.mlir
@ -969,7 +969,7 @@ func.func @contraction(%arg0: vector<4x3xi32>,
  iterator_types = ["parallel", "parallel", "reduction"]
 }
 func.func @contraction(%arg0: vector<2x1xf32>, %arg1: vector<1x3xf32>, %arg2: vector<2x3xf32>)
-> vector<3x2xf32>
+-> vector<2x3xf32>
 {
 // expected-error@+1 {{invalid accumulator/result vector shape, expected: 'vector<3x2xf32>'}}
  %0 = vector.contract #contraction_trait %arg0, %arg1, %arg2
--- a/mlir/test/Transforms/print-op-graph.mlir
+++ b/mlir/test/Transforms/print-op-graph.mlir
@ -50,7 +50,7 @@
 //       CFG:   v[[TEST_FUNC]] -> v[[ANCHOR]][lhead = [[CLUSTER_MERGE_BLOCKS]], style = dashed];
 //       CFG:   v[[ANCHOR]] -> v[[TEST_RET]][ltail = [[CLUSTER_MERGE_BLOCKS]], style = dashed];

-func.func @merge_blocks(%arg0: i32, %arg1 : i32) -> () {
+func.func @merge_blocks(%arg0: i32, %arg1 : i32) -> (i32, i32) {
  %0 = arith.constant dense<[[0, 1], [2, 3]]> : tensor<2x2xi32>
  %1 = arith.constant dense<1> : tensor<5xi32>
  %2 = arith.constant dense<[[0, 1]]> : tensor<1x2xi32>
--- a/mlir/test/Transforms/test-dialect-conversion-pdll.mlir
+++ b/mlir/test/Transforms/test-dialect-conversion-pdll.mlir
@ -2,10 +2,10 @@

 // CHECK-LABEL: @TestSingleConversion
 func.func @TestSingleConversion() {
-  // CHECK: %[[CAST:.*]] = "test.cast"() : () -> f64
-  // CHECK-NEXT: "test.return"(%[[CAST]]) : (f64) -> ()
+  // CHECK: "test.cast"() : () -> f64
+  // CHECK-NEXT: "test.return"() : () -> ()
  %result = "test.cast"() : () -> (i64)
-  "test.return"(%result) : (i64) -> ()
+  "test.return"() : () -> ()
 }

 // CHECK-LABEL: @TestLingeringConversion
--- a/mlir/test/Transforms/test-legalizer.mlir
+++ b/mlir/test/Transforms/test-legalizer.mlir
@ -107,8 +107,10 @@ func.func @remap_input_1_to_N_remaining_use(%arg0: f32) {
 // CHECK-LABEL: func @remap_materialize_1_to_1(%{{.*}}: i43)
 func.func @remap_materialize_1_to_1(%arg0: i42) {
  // CHECK: %[[V:.*]] = "test.cast"(%arg0) : (i43) -> i42
-  // CHECK: "test.return"(%[[V]])
-  "test.return"(%arg0) : (i42) -> ()
+  // CHECK-NEXT: "work"(%[[V]])
+  // expected-remark@+1 {{op 'work' is not legalizable}}
+  "work"(%arg0) : (i42) -> ()
+  "test.return"() : () -> ()
 }

 // -----
@ -251,7 +253,7 @@ func.func @replace_block_arg_1_to_n() {
 // -----

 // CHECK-LABEL: @replace_op_result_1_to_n
-func.func @replace_op_result_1_to_n() {
+func.func @replace_op_result_1_to_n() -> i32 {
  // CHECK: %[[orig:.*]] = "test.legal_op"() : () -> i32
  // CHECK: %[[repl:.*]] = "test.legal_op"() : () -> i16
  %0 = "test.legal_op"() : () -> i32
--- a/mlir/test/Transforms/test-merge-blocks.mlir
+++ b/mlir/test/Transforms/test-merge-blocks.mlir
@ -1,7 +1,7 @@
 // RUN: mlir-opt -allow-unregistered-dialect -split-input-file -test-merge-blocks -verify-diagnostics %s | FileCheck %s

 // CHECK-LABEL: @merge_blocks
-func.func @merge_blocks(%arg0: i32, %arg1 : i32) -> () {
+func.func @merge_blocks(%arg0: i32, %arg1 : i32) -> (i32, i32) {
  //      CHECK: "test.merge_blocks"() ({
  // CHECK-NEXT:   "test.return"
  // CHECK-NEXT: })
--- a/mlir/test/Transforms/test-pattern-selective-replacement.mlir
+++ b/mlir/test/Transforms/test-pattern-selective-replacement.mlir
@ -4,7 +4,7 @@

 // CHECK-LABEL: @test1
 // CHECK-SAME: %[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32
-func.func @test1(%arg0: i32, %arg1 : i32) -> () {
+func.func @test1(%arg0: i32, %arg1 : i32) -> (i32, i32) {
  // CHECK: arith.addi %[[ARG1]], %[[ARG1]]
  // CHECK-NEXT: "test.return"(%[[ARG0]]
  %cast = "test.cast"(%arg0, %arg1) : (i32, i32) -> (i32)