From 7c13f88ecc0547a045a3726593c3bf716c182f07 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Sun, 8 Mar 2026 21:00:02 -0400 Subject: [PATCH] [mlir][NVGPU] Fix double spaces in tests after ODS printer fix. NFC. (#185327) Follow-up to #184253. Update tests that checked for the old double-space output of GPU and NVVM ops using GPU_DimensionAttr and SetMaxRegisterActionAttr. Co-authored-by: Claude Opus 4.6 --- .../Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir | 2 +- mlir/test/Dialect/NVGPU/canonicalization.mlir | 4 ++-- .../test/Dialect/NVGPU/tmaload-transform.mlir | 2 +- .../NVGPU/transform-matmul-to-nvvm.mlir | 2 +- mlir/test/Examples/NVGPU/Ch0.py | 2 +- mlir/test/Examples/NVGPU/Ch3.py | 2 +- mlir/test/Examples/NVGPU/Ch4.py | 24 +++++++++---------- mlir/test/Examples/NVGPU/Ch5.py | 22 ++++++++--------- 8 files changed, 30 insertions(+), 30 deletions(-) diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir index 0eb44789fe31..50bea5a85022 100644 --- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir +++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir @@ -631,7 +631,7 @@ func.func @mbarrier_txcount_pred() { %mine = arith.constant 1 : index // CHECK: %[[c0:.+]] = arith.constant 0 : index // CHECK: %[[mid:.+]] = builtin.unrealized_conversion_cast %[[c0]] : index to i64 - // CHECK: %[[S2:.+]] = gpu.thread_id x + // CHECK: %[[S2:.+]] = gpu.thread_id x // CHECK: %[[P:.+]] = arith.cmpi eq, %[[S2]], %[[c0]] : index %c0 = arith.constant 0 : index %tidx = gpu.thread_id x diff --git a/mlir/test/Dialect/NVGPU/canonicalization.mlir b/mlir/test/Dialect/NVGPU/canonicalization.mlir index a7fbfd806739..7f33a79cb102 100644 --- a/mlir/test/Dialect/NVGPU/canonicalization.mlir +++ b/mlir/test/Dialect/NVGPU/canonicalization.mlir @@ -12,13 +12,13 @@ gpu.module @main_kernel { } { // CHECK: %[[c0:.+]] = arith.constant 0 : index - // CHECK: %[[S0:.+]] = gpu.thread_id x + // CHECK: %[[S0:.+]] = gpu.thread_id x // CHECK: %[[S1:.+]] = arith.cmpi eq, %[[S0]], %[[c0]] : index // CHECK: %[[S2:.+]] = gpu.dynamic_shared_memory : memref> // CHECK: %[[S3:.+]] = memref.view %[[S2]][%[[c0]]][] : memref> to memref<128x32xf32, #gpu.address_space> // CHECK: nvgpu.tma.async.store %[[S3]] to %[[arg0]][%[[c0]], %[[c0]]], predicate = %[[S1]] : memref<128x32xf32, #gpu.address_space> -> , swizzle = none, l2promo = none, oob = zero, interleave = none> %c0 = arith.constant 0 : index - %0 = gpu.thread_id x + %0 = gpu.thread_id x %1 = arith.cmpi eq, %0, %c0 : index %2 = gpu.dynamic_shared_memory : memref> %view = memref.view %2[%c0][] : memref> to memref<128x32xf32, #gpu.address_space> diff --git a/mlir/test/Dialect/NVGPU/tmaload-transform.mlir b/mlir/test/Dialect/NVGPU/tmaload-transform.mlir index 40acd82cd055..901f7732797d 100644 --- a/mlir/test/Dialect/NVGPU/tmaload-transform.mlir +++ b/mlir/test/Dialect/NVGPU/tmaload-transform.mlir @@ -38,7 +38,7 @@ func.func @main() { // CHECK: gpu.barrier // // CHECK: %[[c0:.*]] = arith.constant 0 : index - // CHECK: %[[TIDX:.*]] = gpu.thread_id x + // CHECK: %[[TIDX:.*]] = gpu.thread_id x // CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[TIDX]], %[[c0]] : index // // CHECK: scf.if %[[CMP]] { diff --git a/mlir/test/Dialect/NVGPU/transform-matmul-to-nvvm.mlir b/mlir/test/Dialect/NVGPU/transform-matmul-to-nvvm.mlir index bbe27fe1b99d..c6aa2039a951 100644 --- a/mlir/test/Dialect/NVGPU/transform-matmul-to-nvvm.mlir +++ b/mlir/test/Dialect/NVGPU/transform-matmul-to-nvvm.mlir @@ -13,7 +13,7 @@ func.func @matmul_16x8x4xf32_global( // CHECK-SAME: %[[VAL_1:.*]]: memref<4x8xf32>, // CHECK-SAME: %[[VAL_2:.*]]: memref<16x8xf32>) { -// CHECK: %[[TIDX:.*]] = gpu.thread_id x +// CHECK: %[[TIDX:.*]] = gpu.thread_id x // CHECK: %[[VAL_4:.*]] = affine.apply #[[$div4]]()[%[[TIDX]]] // CHECK: %[[VAL_5:.*]] = affine.apply #[[$mod4]]()[%[[TIDX]]] // CHECK: %[[VAL_6:.*]] = memref.load %[[VAL_0]][%[[VAL_4]], %[[VAL_5]]] : memref<16x4xf32> diff --git a/mlir/test/Examples/NVGPU/Ch0.py b/mlir/test/Examples/NVGPU/Ch0.py index e09720a0f3b7..4f1743bb5f17 100644 --- a/mlir/test/Examples/NVGPU/Ch0.py +++ b/mlir/test/Examples/NVGPU/Ch0.py @@ -61,7 +61,7 @@ main(alpha) # DUMPIR: %[[C1_2:.*]] = arith.constant 1 : index # DUMPIR: %[[C1_3:.*]] = arith.constant 1 : index # DUMPIR: gpu.launch blocks(%arg1, %arg2, %arg3) in (%arg7 = %[[C1]], %arg8 = %[[C1_0]], %arg9 = %[[C1_1]]) threads(%arg4, %arg5, %arg6) in (%arg10 = %[[C4]], %arg11 = %[[C1_2]], %arg12 = %[[C1_3]]) dynamic_shared_memory_size %[[C0_I32]] { -# DUMPIR: %[[TIDX:.*]] = gpu.thread_id x +# DUMPIR: %[[TIDX:.*]] = gpu.thread_id x # DUMPIR: %[[MYVAL:.*]] = arith.addi %arg0, %[[TIDX]] : index # DUMPIR: gpu.printf "GPU thread %llu has %llu\0A", %[[TIDX]], %[[MYVAL]] : index, index # DUMPIR: gpu.terminator diff --git a/mlir/test/Examples/NVGPU/Ch3.py b/mlir/test/Examples/NVGPU/Ch3.py index fe1157541686..815e16dc6855 100644 --- a/mlir/test/Examples/NVGPU/Ch3.py +++ b/mlir/test/Examples/NVGPU/Ch3.py @@ -143,7 +143,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1": # DUMPIR: %[[C64_5:.*]] = arith.constant 64 : index # DUMPIR: %[[C64_6:.*]] = arith.constant 64 : index # DUMPIR: %[[TMA1:.*]] = nvgpu.tma.create.descriptor %[[CAST1]] box[%[[C64_5]], %[[C64_6]]] : memref<*xf16> -> , swizzle = swizzle_128b, l2promo = none, oob = zero, interleave = none> -# DUMPIR: %[[THREADID:.*]] = gpu.thread_id x +# DUMPIR: %[[THREADID:.*]] = gpu.thread_id x # DUMPIR: %[[MB:.*]] = nvgpu.mbarrier.create -> > # DUMPIR: %[[C0:.*]] = arith.constant 0 : index # DUMPIR: %[[EQ:.*]] = arith.cmpi eq, %[[THREADID]], %[[C0]] : index diff --git a/mlir/test/Examples/NVGPU/Ch4.py b/mlir/test/Examples/NVGPU/Ch4.py index dffafda7f21c..c66259d14133 100644 --- a/mlir/test/Examples/NVGPU/Ch4.py +++ b/mlir/test/Examples/NVGPU/Ch4.py @@ -342,14 +342,14 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1": # DUMPIR: %[[C6:.*]] = arith.constant 6 : index # DUMPIR: %[[C1_PROLOGUE:.*]] = arith.constant 1 : index # DUMPIR: scf.for %arg15 = %[[C0_PROLOGUE]] to %[[C6]] step %[[C1_PROLOGUE]] { -# DUMPIR: %[[BID_X_P:.*]] = gpu.block_id x -# DUMPIR: %[[BID_Y_P:.*]] = gpu.block_id y +# DUMPIR: %[[BID_X_P:.*]] = gpu.block_id x +# DUMPIR: %[[BID_Y_P:.*]] = gpu.block_id y # DUMPIR: %[[C128_P1:.*]] = arith.constant 128 : index # DUMPIR: %[[DIMX_P:.*]] = arith.muli %[[BID_X_P]], %[[C128_P1]] : index # DUMPIR: %[[C128_P2:.*]] = arith.constant 128 : index # DUMPIR: %[[DIMY_P:.*]] = arith.muli %[[BID_Y_P]], %[[C128_P2]] : index -# DUMPIR: %{{.*}} = gpu.thread_id x -# DUMPIR: %[[TID_X_P:.*]] = gpu.thread_id x +# DUMPIR: %{{.*}} = gpu.thread_id x +# DUMPIR: %[[TID_X_P:.*]] = gpu.thread_id x # DUMPIR: %[[C0_P:.*]] = arith.constant 0 : index # DUMPIR: %[[PRED_P:.*]] = arith.cmpi eq, %[[TID_X_P]], %[[C0_P]] : index # DUMPIR: %[[C16384_P1:.*]] = arith.constant 16384 : index @@ -376,7 +376,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1": # DUMPIR: %[[DIMY_P_OFF:.*]] = arith.addi %[[DIMY_P]], %[[C64_OFF]] : index # DUMPIR: nvgpu.tma.async.load %{{.*}}[%[[DIMY_P_OFF]], %[[K_COORD_P]]], %{{.*}}[%arg15] to %[[VIEW_B2_P]], predicate = %[[PRED_P]] : , swizzle = swizzle_128b, l2promo = none, oob = zero, interleave = none>, , num_barriers = 7> -> memref<64x64xf16, #gpu.address_space> # DUMPIR: } -# DUMPIR: %[[TID_X_LOOP:.*]] = gpu.thread_id x +# DUMPIR: %[[TID_X_LOOP:.*]] = gpu.thread_id x # DUMPIR: %[[ACC_INIT:.*]] = nvgpu.warpgroup.mma.init.accumulator -> > # DUMPIR: %[[FALSE_LOOP:.*]] = arith.constant false # DUMPIR: %[[C0_LOOP:.*]] = arith.constant 0 : index @@ -409,14 +409,14 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1": # DUMPIR: %[[STAGE_NEXT_L:.*]] = arith.addi %arg15, %[[C6_STAGE]] : index # DUMPIR: %[[C7_MOD:.*]] = arith.constant 7 : index # DUMPIR: %[[STAGE_LOAD:.*]] = arith.remui %[[STAGE_NEXT_L]], %[[C7_MOD]] : index -# DUMPIR: %[[BID_X_L:.*]] = gpu.block_id x -# DUMPIR: %[[BID_Y_L:.*]] = gpu.block_id y +# DUMPIR: %[[BID_X_L:.*]] = gpu.block_id x +# DUMPIR: %[[BID_Y_L:.*]] = gpu.block_id y # DUMPIR: %[[C128_L1:.*]] = arith.constant 128 : index # DUMPIR: %[[DIMX_L:.*]] = arith.muli %[[BID_X_L]], %[[C128_L1]] : index # DUMPIR: %[[C128_L2:.*]] = arith.constant 128 : index # DUMPIR: %[[DIMY_L:.*]] = arith.muli %[[BID_Y_L]], %[[C128_L2]] : index -# DUMPIR: %[[TID_X_L1:.*]] = gpu.thread_id x -# DUMPIR: %[[TID_X_L2:.*]] = gpu.thread_id x +# DUMPIR: %[[TID_X_L1:.*]] = gpu.thread_id x +# DUMPIR: %[[TID_X_L2:.*]] = gpu.thread_id x # DUMPIR: %[[C16384_LA1:.*]] = arith.constant 16384 : index # DUMPIR: %[[OFF_A_LOAD:.*]] = arith.muli %[[STAGE_LOAD]], %[[C16384_LA1]] : index # DUMPIR: %[[C16384_LA2:.*]] = arith.constant 16384 : index @@ -448,9 +448,9 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1": # DUMPIR: scf.yield %[[ACC_L]], %[[NEW_PARITY]] : !nvgpu.warpgroup.accumulator>, i1 # DUMPIR: } # DUMPIR: nvvm.wgmma.wait.group.sync.aligned 0 -# DUMPIR: %[[TID_X_EPI:.*]] = gpu.thread_id x -# DUMPIR: %[[BID_X_EPI:.*]] = gpu.block_id x -# DUMPIR: %[[BID_Y_EPI:.*]] = gpu.block_id y +# DUMPIR: %[[TID_X_EPI:.*]] = gpu.thread_id x +# DUMPIR: %[[BID_X_EPI:.*]] = gpu.block_id x +# DUMPIR: %[[BID_Y_EPI:.*]] = gpu.block_id y # DUMPIR: %[[C128_EPI1:.*]] = arith.constant 128 : index # DUMPIR: %[[DIMX_EPI:.*]] = arith.muli %[[BID_X_EPI]], %[[C128_EPI1]] : index # DUMPIR: %[[C128_EPI2:.*]] = arith.constant 128 : index diff --git a/mlir/test/Examples/NVGPU/Ch5.py b/mlir/test/Examples/NVGPU/Ch5.py index b725e50d8f44..4f06f9714262 100644 --- a/mlir/test/Examples/NVGPU/Ch5.py +++ b/mlir/test/Examples/NVGPU/Ch5.py @@ -324,7 +324,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1": # CHECK-NOT: Mismatched elements # CHECK: PASS -# DUMPIR: %[[TID_X:.*]] = gpu.thread_id x +# DUMPIR: %[[TID_X:.*]] = gpu.thread_id x # DUMPIR: %[[C128:.*]] = arith.constant 128 : index # DUMPIR: %[[REM1:.*]] = arith.remui %[[TID_X]], %[[C128]] : index # DUMPIR: %[[C0:.*]] = arith.constant 0 : index @@ -333,7 +333,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1": # DUMPIR: %[[DIV1:.*]] = arith.divui %[[TID_X]], %[[C128_1]] : index # DUMPIR: %[[C1:.*]] = arith.constant 1 : index # DUMPIR: %[[IS_PRODUCER:.*]] = arith.cmpi eq, %[[DIV1]], %[[C1]] : index -# DUMPIR: %[[TID_X_2:.*]] = gpu.thread_id x +# DUMPIR: %[[TID_X_2:.*]] = gpu.thread_id x # DUMPIR: %[[C128_2:.*]] = arith.constant 128 : index # DUMPIR: %[[REM2:.*]] = arith.remui %[[TID_X_2]], %[[C128_2]] : index # DUMPIR: %[[C0_2:.*]] = arith.constant 0 : index @@ -342,7 +342,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1": # DUMPIR: %[[DIV2:.*]] = arith.divui %[[TID_X_2]], %[[C128_3]] : index # DUMPIR: %[[C0_3:.*]] = arith.constant 0 : index # DUMPIR: %[[IS_CONSUMER:.*]] = arith.cmpi eq, %[[DIV2]], %[[C0_3]] : index -# DUMPIR: %[[TID_X_3:.*]] = gpu.thread_id x +# DUMPIR: %[[TID_X_3:.*]] = gpu.thread_id x # DUMPIR: %[[MBAR_MMA:.*]] = nvgpu.mbarrier.create -> , num_barriers = 7> # DUMPIR: %[[MBAR_TMA:.*]] = nvgpu.mbarrier.create -> , num_barriers = 7> # DUMPIR: %[[C0_4:.*]] = arith.constant 0 : index @@ -361,7 +361,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1": # DUMPIR: nvgpu.tma.prefetch.descriptor %{{.*}} : , swizzle = swizzle_128b, l2promo = none, oob = zero, interleave = none> # DUMPIR: } # DUMPIR: scf.if %[[IS_PRODUCER]] { -# DUMPIR: nvvm.setmaxregister decrease 40 +# DUMPIR: nvvm.setmaxregister decrease 40 # DUMPIR: %[[TRUE:.*]] = arith.constant true # DUMPIR: %[[C0_PROD:.*]] = arith.constant 0 : index # DUMPIR: %[[C16:.*]] = arith.constant 16 : index @@ -376,13 +376,13 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1": # DUMPIR: %[[TRUE_2:.*]] = arith.constant true # DUMPIR: %[[FLIP:.*]] = arith.xori %arg16, %[[TRUE_2]] : i1 # DUMPIR: %[[PHASE:.*]] = arith.select %[[IS_LAST]], %[[FLIP]], %arg16 : i1 -# DUMPIR: %[[BID_X:.*]] = gpu.block_id x -# DUMPIR: %[[BID_Y:.*]] = gpu.block_id y +# DUMPIR: %[[BID_X:.*]] = gpu.block_id x +# DUMPIR: %[[BID_Y:.*]] = gpu.block_id y # DUMPIR: %[[C128_TILE:.*]] = arith.constant 128 : index # DUMPIR: %[[DIM_X:.*]] = arith.muli %[[BID_X]], %[[C128_TILE]] : index # DUMPIR: %[[C128_TILE_2:.*]] = arith.constant 128 : index # DUMPIR: %[[DIM_Y:.*]] = arith.muli %[[BID_Y]], %[[C128_TILE_2]] : index -# DUMPIR: %[[TID_PROD:.*]] = gpu.thread_id x +# DUMPIR: %[[TID_PROD:.*]] = gpu.thread_id x # DUMPIR: %[[C16384:.*]] = arith.constant 16384 : index # DUMPIR: %[[OFF_A:.*]] = arith.muli %[[SLOT]], %[[C16384]] : index # DUMPIR: %[[C16384_2:.*]] = arith.constant 16384 : index @@ -414,7 +414,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1": # DUMPIR: } # DUMPIR: } # DUMPIR: scf.if %[[IS_CONSUMER]] { -# DUMPIR: nvvm.setmaxregister increase 232 +# DUMPIR: nvvm.setmaxregister increase 232 # DUMPIR: %[[FALSE:.*]] = arith.constant false # DUMPIR: %[[ACC_INIT:.*]] = nvgpu.warpgroup.mma.init.accumulator -> > # DUMPIR: %[[C0_CONS:.*]] = arith.constant 0 : index @@ -456,9 +456,9 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1": # DUMPIR: scf.yield %[[ACC]], %[[PHASE_CONS]] : !nvgpu.warpgroup.accumulator>, i1 # DUMPIR: } # DUMPIR: nvvm.wgmma.wait.group.sync.aligned 0 -# DUMPIR: %[[TID_EPI:.*]] = gpu.thread_id x -# DUMPIR: %[[BID_X_EPI:.*]] = gpu.block_id x -# DUMPIR: %[[BID_Y_EPI:.*]] = gpu.block_id y +# DUMPIR: %[[TID_EPI:.*]] = gpu.thread_id x +# DUMPIR: %[[BID_X_EPI:.*]] = gpu.block_id x +# DUMPIR: %[[BID_Y_EPI:.*]] = gpu.block_id y # DUMPIR: %[[C128_EPI:.*]] = arith.constant 128 : index # DUMPIR: %[[DIM_X_EPI:.*]] = arith.muli %[[BID_X_EPI]], %[[C128_EPI]] : index # DUMPIR: %[[C128_EPI_2:.*]] = arith.constant 128 : index