From 7c13f88ecc0547a045a3726593c3bf716c182f07 Mon Sep 17 00:00:00 2001
From: Jakub Kuderski <jakub@nod-labs.com>
Date: Sun, 8 Mar 2026 21:00:02 -0400
Subject: [PATCH] [mlir][NVGPU] Fix double spaces in tests after ODS printer
 fix. NFC. (#185327)

Follow-up to #184253. Update tests that checked for the old double-space
output of GPU and NVVM ops using GPU_DimensionAttr and
SetMaxRegisterActionAttr.

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir |  2 +-
 mlir/test/Dialect/NVGPU/canonicalization.mlir |  4 ++--
 .../test/Dialect/NVGPU/tmaload-transform.mlir |  2 +-
 .../NVGPU/transform-matmul-to-nvvm.mlir       |  2 +-
 mlir/test/Examples/NVGPU/Ch0.py               |  2 +-
 mlir/test/Examples/NVGPU/Ch3.py               |  2 +-
 mlir/test/Examples/NVGPU/Ch4.py               | 24 +++++++++----------
 mlir/test/Examples/NVGPU/Ch5.py               | 22 ++++++++---------
 8 files changed, 30 insertions(+), 30 deletions(-)
diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
index 0eb44789fe31..50bea5a85022 100644
--- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
@@ -631,7 +631,7 @@ func.func @mbarrier_txcount_pred() {
     %mine = arith.constant 1 : index
     // CHECK: %[[c0:.+]] = arith.constant 0 : index
     // CHECK: %[[mid:.+]] = builtin.unrealized_conversion_cast %[[c0]] : index to i64
-    // CHECK: %[[S2:.+]] = gpu.thread_id  x
+    // CHECK: %[[S2:.+]] = gpu.thread_id x
     // CHECK: %[[P:.+]] = arith.cmpi eq, %[[S2]], %[[c0]] : index
     %c0 = arith.constant 0 : index
     %tidx = gpu.thread_id x
diff --git a/mlir/test/Dialect/NVGPU/canonicalization.mlir b/mlir/test/Dialect/NVGPU/canonicalization.mlir
index a7fbfd806739..7f33a79cb102 100644
--- a/mlir/test/Dialect/NVGPU/canonicalization.mlir
+++ b/mlir/test/Dialect/NVGPU/canonicalization.mlir
@@ -12,13 +12,13 @@ gpu.module @main_kernel {
         } 
   {
     // CHECK: %[[c0:.+]] = arith.constant 0 : index 
-    // CHECK: %[[S0:.+]] = gpu.thread_id  x
+    // CHECK: %[[S0:.+]] = gpu.thread_id x
     // CHECK: %[[S1:.+]] = arith.cmpi eq, %[[S0]], %[[c0]] : index
     // CHECK: %[[S2:.+]] = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>>
     // CHECK: %[[S3:.+]] = memref.view %[[S2]][%[[c0]]][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<128x32xf32, #gpu.address_space<workgroup>>
     // CHECK: nvgpu.tma.async.store %[[S3]] to %[[arg0]][%[[c0]], %[[c0]]], predicate = %[[S1]] : memref<128x32xf32, #gpu.address_space<workgroup>> -> <tensor = memref<128x32xf32, 3>, swizzle = none, l2promo = none, oob = zero, interleave = none>
     %c0 = arith.constant 0 : index
-    %0 = gpu.thread_id  x
+    %0 = gpu.thread_id x
     %1 = arith.cmpi eq, %0, %c0 : index
     %2 = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>>
     %view = memref.view %2[%c0][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<128x32xf32, #gpu.address_space<workgroup>>
diff --git a/mlir/test/Dialect/NVGPU/tmaload-transform.mlir b/mlir/test/Dialect/NVGPU/tmaload-transform.mlir
index 40acd82cd055..901f7732797d 100644
--- a/mlir/test/Dialect/NVGPU/tmaload-transform.mlir
+++ b/mlir/test/Dialect/NVGPU/tmaload-transform.mlir
@@ -38,7 +38,7 @@ func.func @main() {
     //      CHECK: gpu.barrier
     //
     //      CHECK: %[[c0:.*]] = arith.constant 0 : index
-    //      CHECK: %[[TIDX:.*]] = gpu.thread_id  x
+    //      CHECK: %[[TIDX:.*]] = gpu.thread_id x
     //      CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[TIDX]], %[[c0]] : index
     //
     //      CHECK: scf.if %[[CMP]] {
diff --git a/mlir/test/Dialect/NVGPU/transform-matmul-to-nvvm.mlir b/mlir/test/Dialect/NVGPU/transform-matmul-to-nvvm.mlir
index bbe27fe1b99d..c6aa2039a951 100644
--- a/mlir/test/Dialect/NVGPU/transform-matmul-to-nvvm.mlir
+++ b/mlir/test/Dialect/NVGPU/transform-matmul-to-nvvm.mlir
@@ -13,7 +13,7 @@ func.func @matmul_16x8x4xf32_global(
 // CHECK-SAME:                                        %[[VAL_1:.*]]: memref<4x8xf32>,
 // CHECK-SAME:                                        %[[VAL_2:.*]]: memref<16x8xf32>) {
 
-// CHECK:           %[[TIDX:.*]] = gpu.thread_id  x
+// CHECK:           %[[TIDX:.*]] = gpu.thread_id x
 // CHECK:           %[[VAL_4:.*]] = affine.apply #[[$div4]]()[%[[TIDX]]]
 // CHECK:           %[[VAL_5:.*]] = affine.apply #[[$mod4]]()[%[[TIDX]]]
 // CHECK:           %[[VAL_6:.*]] = memref.load %[[VAL_0]][%[[VAL_4]], %[[VAL_5]]] : memref<16x4xf32>
diff --git a/mlir/test/Examples/NVGPU/Ch0.py b/mlir/test/Examples/NVGPU/Ch0.py
index e09720a0f3b7..4f1743bb5f17 100644
--- a/mlir/test/Examples/NVGPU/Ch0.py
+++ b/mlir/test/Examples/NVGPU/Ch0.py
@@ -61,7 +61,7 @@ main(alpha)
 # DUMPIR:     %[[C1_2:.*]] = arith.constant 1 : index
 # DUMPIR:     %[[C1_3:.*]] = arith.constant 1 : index
 # DUMPIR:     gpu.launch blocks(%arg1, %arg2, %arg3) in (%arg7 = %[[C1]], %arg8 = %[[C1_0]], %arg9 = %[[C1_1]]) threads(%arg4, %arg5, %arg6) in (%arg10 = %[[C4]], %arg11 = %[[C1_2]], %arg12 = %[[C1_3]]) dynamic_shared_memory_size %[[C0_I32]] {
-# DUMPIR:       %[[TIDX:.*]] = gpu.thread_id  x
+# DUMPIR:       %[[TIDX:.*]] = gpu.thread_id x
 # DUMPIR:       %[[MYVAL:.*]] = arith.addi %arg0, %[[TIDX]] : index
 # DUMPIR:       gpu.printf "GPU thread %llu has %llu\0A", %[[TIDX]], %[[MYVAL]] : index, index
 # DUMPIR:       gpu.terminator
diff --git a/mlir/test/Examples/NVGPU/Ch3.py b/mlir/test/Examples/NVGPU/Ch3.py
index fe1157541686..815e16dc6855 100644
--- a/mlir/test/Examples/NVGPU/Ch3.py
+++ b/mlir/test/Examples/NVGPU/Ch3.py
@@ -143,7 +143,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
 # DUMPIR:     %[[C64_5:.*]] = arith.constant 64 : index
 # DUMPIR:     %[[C64_6:.*]] = arith.constant 64 : index
 # DUMPIR:     %[[TMA1:.*]] = nvgpu.tma.create.descriptor %[[CAST1]] box[%[[C64_5]], %[[C64_6]]] : memref<*xf16> -> <tensor = memref<64x64xf16, 3>, swizzle = swizzle_128b, l2promo = none, oob = zero, interleave = none>
-# DUMPIR:       %[[THREADID:.*]] = gpu.thread_id  x
+# DUMPIR:       %[[THREADID:.*]] = gpu.thread_id x
 # DUMPIR:       %[[MB:.*]] = nvgpu.mbarrier.create -> <memorySpace = #gpu.address_space<workgroup>>
 # DUMPIR:       %[[C0:.*]] = arith.constant 0 : index
 # DUMPIR:       %[[EQ:.*]] = arith.cmpi eq, %[[THREADID]], %[[C0]] : index
diff --git a/mlir/test/Examples/NVGPU/Ch4.py b/mlir/test/Examples/NVGPU/Ch4.py
index dffafda7f21c..c66259d14133 100644
--- a/mlir/test/Examples/NVGPU/Ch4.py
+++ b/mlir/test/Examples/NVGPU/Ch4.py
@@ -342,14 +342,14 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
 # DUMPIR:       %[[C6:.*]] = arith.constant 6 : index
 # DUMPIR:       %[[C1_PROLOGUE:.*]] = arith.constant 1 : index
 # DUMPIR:       scf.for %arg15 = %[[C0_PROLOGUE]] to %[[C6]] step %[[C1_PROLOGUE]] {
-# DUMPIR:         %[[BID_X_P:.*]] = gpu.block_id  x
-# DUMPIR:         %[[BID_Y_P:.*]] = gpu.block_id  y
+# DUMPIR:         %[[BID_X_P:.*]] = gpu.block_id x
+# DUMPIR:         %[[BID_Y_P:.*]] = gpu.block_id y
 # DUMPIR:         %[[C128_P1:.*]] = arith.constant 128 : index
 # DUMPIR:         %[[DIMX_P:.*]] = arith.muli %[[BID_X_P]], %[[C128_P1]] : index
 # DUMPIR:         %[[C128_P2:.*]] = arith.constant 128 : index
 # DUMPIR:         %[[DIMY_P:.*]] = arith.muli %[[BID_Y_P]], %[[C128_P2]] : index
-# DUMPIR:         %{{.*}} = gpu.thread_id  x
-# DUMPIR:         %[[TID_X_P:.*]] = gpu.thread_id  x
+# DUMPIR:         %{{.*}} = gpu.thread_id x
+# DUMPIR:         %[[TID_X_P:.*]] = gpu.thread_id x
 # DUMPIR:         %[[C0_P:.*]] = arith.constant 0 : index
 # DUMPIR:         %[[PRED_P:.*]] = arith.cmpi eq, %[[TID_X_P]], %[[C0_P]] : index
 # DUMPIR:         %[[C16384_P1:.*]] = arith.constant 16384 : index
@@ -376,7 +376,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
 # DUMPIR:         %[[DIMY_P_OFF:.*]] = arith.addi %[[DIMY_P]], %[[C64_OFF]] : index
 # DUMPIR:         nvgpu.tma.async.load %{{.*}}[%[[DIMY_P_OFF]], %[[K_COORD_P]]], %{{.*}}[%arg15] to %[[VIEW_B2_P]], predicate = %[[PRED_P]] : <tensor = memref<64x64xf16, 3>, swizzle = swizzle_128b, l2promo = none, oob = zero, interleave = none>, <memorySpace = #gpu.address_space<workgroup>, num_barriers = 7> -> memref<64x64xf16, #gpu.address_space<workgroup>>
 # DUMPIR:       }
-# DUMPIR:       %[[TID_X_LOOP:.*]] = gpu.thread_id  x
+# DUMPIR:       %[[TID_X_LOOP:.*]] = gpu.thread_id x
 # DUMPIR:       %[[ACC_INIT:.*]] = nvgpu.warpgroup.mma.init.accumulator -> <fragmented = vector<128x128xf32>>
 # DUMPIR:       %[[FALSE_LOOP:.*]] = arith.constant false
 # DUMPIR:       %[[C0_LOOP:.*]] = arith.constant 0 : index
@@ -409,14 +409,14 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
 # DUMPIR:         %[[STAGE_NEXT_L:.*]] = arith.addi %arg15, %[[C6_STAGE]] : index
 # DUMPIR:         %[[C7_MOD:.*]] = arith.constant 7 : index
 # DUMPIR:         %[[STAGE_LOAD:.*]] = arith.remui %[[STAGE_NEXT_L]], %[[C7_MOD]] : index
-# DUMPIR:         %[[BID_X_L:.*]] = gpu.block_id  x
-# DUMPIR:         %[[BID_Y_L:.*]] = gpu.block_id  y
+# DUMPIR:         %[[BID_X_L:.*]] = gpu.block_id x
+# DUMPIR:         %[[BID_Y_L:.*]] = gpu.block_id y
 # DUMPIR:         %[[C128_L1:.*]] = arith.constant 128 : index
 # DUMPIR:         %[[DIMX_L:.*]] = arith.muli %[[BID_X_L]], %[[C128_L1]] : index
 # DUMPIR:         %[[C128_L2:.*]] = arith.constant 128 : index
 # DUMPIR:         %[[DIMY_L:.*]] = arith.muli %[[BID_Y_L]], %[[C128_L2]] : index
-# DUMPIR:         %[[TID_X_L1:.*]] = gpu.thread_id  x
-# DUMPIR:         %[[TID_X_L2:.*]] = gpu.thread_id  x
+# DUMPIR:         %[[TID_X_L1:.*]] = gpu.thread_id x
+# DUMPIR:         %[[TID_X_L2:.*]] = gpu.thread_id x
 # DUMPIR:         %[[C16384_LA1:.*]] = arith.constant 16384 : index
 # DUMPIR:         %[[OFF_A_LOAD:.*]] = arith.muli %[[STAGE_LOAD]], %[[C16384_LA1]] : index
 # DUMPIR:         %[[C16384_LA2:.*]] = arith.constant 16384 : index
@@ -448,9 +448,9 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
 # DUMPIR:         scf.yield %[[ACC_L]], %[[NEW_PARITY]] : !nvgpu.warpgroup.accumulator<fragmented = vector<128x128xf32>>, i1
 # DUMPIR:       }
 # DUMPIR:       nvvm.wgmma.wait.group.sync.aligned 0
-# DUMPIR:       %[[TID_X_EPI:.*]] = gpu.thread_id  x
-# DUMPIR:       %[[BID_X_EPI:.*]] = gpu.block_id  x
-# DUMPIR:       %[[BID_Y_EPI:.*]] = gpu.block_id  y
+# DUMPIR:       %[[TID_X_EPI:.*]] = gpu.thread_id x
+# DUMPIR:       %[[BID_X_EPI:.*]] = gpu.block_id x
+# DUMPIR:       %[[BID_Y_EPI:.*]] = gpu.block_id y
 # DUMPIR:       %[[C128_EPI1:.*]] = arith.constant 128 : index
 # DUMPIR:       %[[DIMX_EPI:.*]] = arith.muli %[[BID_X_EPI]], %[[C128_EPI1]] : index
 # DUMPIR:       %[[C128_EPI2:.*]] = arith.constant 128 : index
diff --git a/mlir/test/Examples/NVGPU/Ch5.py b/mlir/test/Examples/NVGPU/Ch5.py
index b725e50d8f44..4f06f9714262 100644
--- a/mlir/test/Examples/NVGPU/Ch5.py
+++ b/mlir/test/Examples/NVGPU/Ch5.py
@@ -324,7 +324,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
 # CHECK-NOT: Mismatched elements
 # CHECK: PASS
 
-# DUMPIR:       %[[TID_X:.*]] = gpu.thread_id  x
+# DUMPIR:       %[[TID_X:.*]] = gpu.thread_id x
 # DUMPIR:       %[[C128:.*]] = arith.constant 128 : index
 # DUMPIR:       %[[REM1:.*]] = arith.remui %[[TID_X]], %[[C128]] : index
 # DUMPIR:       %[[C0:.*]] = arith.constant 0 : index
@@ -333,7 +333,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
 # DUMPIR:       %[[DIV1:.*]] = arith.divui %[[TID_X]], %[[C128_1]] : index
 # DUMPIR:       %[[C1:.*]] = arith.constant 1 : index
 # DUMPIR:       %[[IS_PRODUCER:.*]] = arith.cmpi eq, %[[DIV1]], %[[C1]] : index
-# DUMPIR:       %[[TID_X_2:.*]] = gpu.thread_id  x
+# DUMPIR:       %[[TID_X_2:.*]] = gpu.thread_id x
 # DUMPIR:       %[[C128_2:.*]] = arith.constant 128 : index
 # DUMPIR:       %[[REM2:.*]] = arith.remui %[[TID_X_2]], %[[C128_2]] : index
 # DUMPIR:       %[[C0_2:.*]] = arith.constant 0 : index
@@ -342,7 +342,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
 # DUMPIR:       %[[DIV2:.*]] = arith.divui %[[TID_X_2]], %[[C128_3]] : index
 # DUMPIR:       %[[C0_3:.*]] = arith.constant 0 : index
 # DUMPIR:       %[[IS_CONSUMER:.*]] = arith.cmpi eq, %[[DIV2]], %[[C0_3]] : index
-# DUMPIR:       %[[TID_X_3:.*]] = gpu.thread_id  x
+# DUMPIR:       %[[TID_X_3:.*]] = gpu.thread_id x
 # DUMPIR:       %[[MBAR_MMA:.*]] = nvgpu.mbarrier.create -> <memorySpace = #gpu.address_space<workgroup>, num_barriers = 7>
 # DUMPIR:       %[[MBAR_TMA:.*]] = nvgpu.mbarrier.create -> <memorySpace = #gpu.address_space<workgroup>, num_barriers = 7>
 # DUMPIR:       %[[C0_4:.*]] = arith.constant 0 : index
@@ -361,7 +361,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
 # DUMPIR:         nvgpu.tma.prefetch.descriptor %{{.*}} : <tensor = memref<64x64xf16, 3>, swizzle = swizzle_128b, l2promo = none, oob = zero, interleave = none>
 # DUMPIR:       }
 # DUMPIR:       scf.if %[[IS_PRODUCER]] {
-# DUMPIR:         nvvm.setmaxregister  decrease 40
+# DUMPIR:         nvvm.setmaxregister decrease 40
 # DUMPIR:         %[[TRUE:.*]] = arith.constant true
 # DUMPIR:         %[[C0_PROD:.*]] = arith.constant 0 : index
 # DUMPIR:         %[[C16:.*]] = arith.constant 16 : index
@@ -376,13 +376,13 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
 # DUMPIR:           %[[TRUE_2:.*]] = arith.constant true
 # DUMPIR:           %[[FLIP:.*]] = arith.xori %arg16, %[[TRUE_2]] : i1
 # DUMPIR:           %[[PHASE:.*]] = arith.select %[[IS_LAST]], %[[FLIP]], %arg16 : i1
-# DUMPIR:           %[[BID_X:.*]] = gpu.block_id  x
-# DUMPIR:           %[[BID_Y:.*]] = gpu.block_id  y
+# DUMPIR:           %[[BID_X:.*]] = gpu.block_id x
+# DUMPIR:           %[[BID_Y:.*]] = gpu.block_id y
 # DUMPIR:           %[[C128_TILE:.*]] = arith.constant 128 : index
 # DUMPIR:           %[[DIM_X:.*]] = arith.muli %[[BID_X]], %[[C128_TILE]] : index
 # DUMPIR:           %[[C128_TILE_2:.*]] = arith.constant 128 : index
 # DUMPIR:           %[[DIM_Y:.*]] = arith.muli %[[BID_Y]], %[[C128_TILE_2]] : index
-# DUMPIR:           %[[TID_PROD:.*]] = gpu.thread_id  x
+# DUMPIR:           %[[TID_PROD:.*]] = gpu.thread_id x
 # DUMPIR:           %[[C16384:.*]] = arith.constant 16384 : index
 # DUMPIR:           %[[OFF_A:.*]] = arith.muli %[[SLOT]], %[[C16384]] : index
 # DUMPIR:           %[[C16384_2:.*]] = arith.constant 16384 : index
@@ -414,7 +414,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
 # DUMPIR:         }
 # DUMPIR:       }
 # DUMPIR:       scf.if %[[IS_CONSUMER]] {
-# DUMPIR:         nvvm.setmaxregister  increase 232
+# DUMPIR:         nvvm.setmaxregister increase 232
 # DUMPIR:         %[[FALSE:.*]] = arith.constant false
 # DUMPIR:         %[[ACC_INIT:.*]] = nvgpu.warpgroup.mma.init.accumulator -> <fragmented = vector<128x128xf32>>
 # DUMPIR:         %[[C0_CONS:.*]] = arith.constant 0 : index
@@ -456,9 +456,9 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
 # DUMPIR:           scf.yield %[[ACC]], %[[PHASE_CONS]] : !nvgpu.warpgroup.accumulator<fragmented = vector<128x128xf32>>, i1
 # DUMPIR:         }
 # DUMPIR:         nvvm.wgmma.wait.group.sync.aligned 0
-# DUMPIR:         %[[TID_EPI:.*]] = gpu.thread_id  x
-# DUMPIR:         %[[BID_X_EPI:.*]] = gpu.block_id  x
-# DUMPIR:         %[[BID_Y_EPI:.*]] = gpu.block_id  y
+# DUMPIR:         %[[TID_EPI:.*]] = gpu.thread_id x
+# DUMPIR:         %[[BID_X_EPI:.*]] = gpu.block_id x
+# DUMPIR:         %[[BID_Y_EPI:.*]] = gpu.block_id y
 # DUMPIR:         %[[C128_EPI:.*]] = arith.constant 128 : index
 # DUMPIR:         %[[DIM_X_EPI:.*]] = arith.muli %[[BID_X_EPI]], %[[C128_EPI]] : index
 # DUMPIR:         %[[C128_EPI_2:.*]] = arith.constant 128 : index