[mlir][NVGPU] Fix double spaces in tests after ODS printer fix. NFC. (#185327)

Follow-up to #184253. Update tests that checked for the old double-space
output of GPU and NVVM ops using GPU_DimensionAttr and
SetMaxRegisterActionAttr.

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jakub Kuderski 2026-03-08 21:00:02 -04:00 committed by GitHub
parent ade6309229
commit 7c13f88ecc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 30 additions and 30 deletions

View File

@ -631,7 +631,7 @@ func.func @mbarrier_txcount_pred() {
%mine = arith.constant 1 : index
// CHECK: %[[c0:.+]] = arith.constant 0 : index
// CHECK: %[[mid:.+]] = builtin.unrealized_conversion_cast %[[c0]] : index to i64
// CHECK: %[[S2:.+]] = gpu.thread_id x
// CHECK: %[[S2:.+]] = gpu.thread_id x
// CHECK: %[[P:.+]] = arith.cmpi eq, %[[S2]], %[[c0]] : index
%c0 = arith.constant 0 : index
%tidx = gpu.thread_id x

View File

@ -12,13 +12,13 @@ gpu.module @main_kernel {
}
{
// CHECK: %[[c0:.+]] = arith.constant 0 : index
// CHECK: %[[S0:.+]] = gpu.thread_id x
// CHECK: %[[S0:.+]] = gpu.thread_id x
// CHECK: %[[S1:.+]] = arith.cmpi eq, %[[S0]], %[[c0]] : index
// CHECK: %[[S2:.+]] = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>>
// CHECK: %[[S3:.+]] = memref.view %[[S2]][%[[c0]]][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<128x32xf32, #gpu.address_space<workgroup>>
// CHECK: nvgpu.tma.async.store %[[S3]] to %[[arg0]][%[[c0]], %[[c0]]], predicate = %[[S1]] : memref<128x32xf32, #gpu.address_space<workgroup>> -> <tensor = memref<128x32xf32, 3>, swizzle = none, l2promo = none, oob = zero, interleave = none>
%c0 = arith.constant 0 : index
%0 = gpu.thread_id x
%0 = gpu.thread_id x
%1 = arith.cmpi eq, %0, %c0 : index
%2 = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>>
%view = memref.view %2[%c0][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<128x32xf32, #gpu.address_space<workgroup>>

View File

@ -38,7 +38,7 @@ func.func @main() {
// CHECK: gpu.barrier
//
// CHECK: %[[c0:.*]] = arith.constant 0 : index
// CHECK: %[[TIDX:.*]] = gpu.thread_id x
// CHECK: %[[TIDX:.*]] = gpu.thread_id x
// CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[TIDX]], %[[c0]] : index
//
// CHECK: scf.if %[[CMP]] {

View File

@ -13,7 +13,7 @@ func.func @matmul_16x8x4xf32_global(
// CHECK-SAME: %[[VAL_1:.*]]: memref<4x8xf32>,
// CHECK-SAME: %[[VAL_2:.*]]: memref<16x8xf32>) {
// CHECK: %[[TIDX:.*]] = gpu.thread_id x
// CHECK: %[[TIDX:.*]] = gpu.thread_id x
// CHECK: %[[VAL_4:.*]] = affine.apply #[[$div4]]()[%[[TIDX]]]
// CHECK: %[[VAL_5:.*]] = affine.apply #[[$mod4]]()[%[[TIDX]]]
// CHECK: %[[VAL_6:.*]] = memref.load %[[VAL_0]][%[[VAL_4]], %[[VAL_5]]] : memref<16x4xf32>

View File

@ -61,7 +61,7 @@ main(alpha)
# DUMPIR: %[[C1_2:.*]] = arith.constant 1 : index
# DUMPIR: %[[C1_3:.*]] = arith.constant 1 : index
# DUMPIR: gpu.launch blocks(%arg1, %arg2, %arg3) in (%arg7 = %[[C1]], %arg8 = %[[C1_0]], %arg9 = %[[C1_1]]) threads(%arg4, %arg5, %arg6) in (%arg10 = %[[C4]], %arg11 = %[[C1_2]], %arg12 = %[[C1_3]]) dynamic_shared_memory_size %[[C0_I32]] {
# DUMPIR: %[[TIDX:.*]] = gpu.thread_id x
# DUMPIR: %[[TIDX:.*]] = gpu.thread_id x
# DUMPIR: %[[MYVAL:.*]] = arith.addi %arg0, %[[TIDX]] : index
# DUMPIR: gpu.printf "GPU thread %llu has %llu\0A", %[[TIDX]], %[[MYVAL]] : index, index
# DUMPIR: gpu.terminator

View File

@ -143,7 +143,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
# DUMPIR: %[[C64_5:.*]] = arith.constant 64 : index
# DUMPIR: %[[C64_6:.*]] = arith.constant 64 : index
# DUMPIR: %[[TMA1:.*]] = nvgpu.tma.create.descriptor %[[CAST1]] box[%[[C64_5]], %[[C64_6]]] : memref<*xf16> -> <tensor = memref<64x64xf16, 3>, swizzle = swizzle_128b, l2promo = none, oob = zero, interleave = none>
# DUMPIR: %[[THREADID:.*]] = gpu.thread_id x
# DUMPIR: %[[THREADID:.*]] = gpu.thread_id x
# DUMPIR: %[[MB:.*]] = nvgpu.mbarrier.create -> <memorySpace = #gpu.address_space<workgroup>>
# DUMPIR: %[[C0:.*]] = arith.constant 0 : index
# DUMPIR: %[[EQ:.*]] = arith.cmpi eq, %[[THREADID]], %[[C0]] : index

View File

@ -342,14 +342,14 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
# DUMPIR: %[[C6:.*]] = arith.constant 6 : index
# DUMPIR: %[[C1_PROLOGUE:.*]] = arith.constant 1 : index
# DUMPIR: scf.for %arg15 = %[[C0_PROLOGUE]] to %[[C6]] step %[[C1_PROLOGUE]] {
# DUMPIR: %[[BID_X_P:.*]] = gpu.block_id x
# DUMPIR: %[[BID_Y_P:.*]] = gpu.block_id y
# DUMPIR: %[[BID_X_P:.*]] = gpu.block_id x
# DUMPIR: %[[BID_Y_P:.*]] = gpu.block_id y
# DUMPIR: %[[C128_P1:.*]] = arith.constant 128 : index
# DUMPIR: %[[DIMX_P:.*]] = arith.muli %[[BID_X_P]], %[[C128_P1]] : index
# DUMPIR: %[[C128_P2:.*]] = arith.constant 128 : index
# DUMPIR: %[[DIMY_P:.*]] = arith.muli %[[BID_Y_P]], %[[C128_P2]] : index
# DUMPIR: %{{.*}} = gpu.thread_id x
# DUMPIR: %[[TID_X_P:.*]] = gpu.thread_id x
# DUMPIR: %{{.*}} = gpu.thread_id x
# DUMPIR: %[[TID_X_P:.*]] = gpu.thread_id x
# DUMPIR: %[[C0_P:.*]] = arith.constant 0 : index
# DUMPIR: %[[PRED_P:.*]] = arith.cmpi eq, %[[TID_X_P]], %[[C0_P]] : index
# DUMPIR: %[[C16384_P1:.*]] = arith.constant 16384 : index
@ -376,7 +376,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
# DUMPIR: %[[DIMY_P_OFF:.*]] = arith.addi %[[DIMY_P]], %[[C64_OFF]] : index
# DUMPIR: nvgpu.tma.async.load %{{.*}}[%[[DIMY_P_OFF]], %[[K_COORD_P]]], %{{.*}}[%arg15] to %[[VIEW_B2_P]], predicate = %[[PRED_P]] : <tensor = memref<64x64xf16, 3>, swizzle = swizzle_128b, l2promo = none, oob = zero, interleave = none>, <memorySpace = #gpu.address_space<workgroup>, num_barriers = 7> -> memref<64x64xf16, #gpu.address_space<workgroup>>
# DUMPIR: }
# DUMPIR: %[[TID_X_LOOP:.*]] = gpu.thread_id x
# DUMPIR: %[[TID_X_LOOP:.*]] = gpu.thread_id x
# DUMPIR: %[[ACC_INIT:.*]] = nvgpu.warpgroup.mma.init.accumulator -> <fragmented = vector<128x128xf32>>
# DUMPIR: %[[FALSE_LOOP:.*]] = arith.constant false
# DUMPIR: %[[C0_LOOP:.*]] = arith.constant 0 : index
@ -409,14 +409,14 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
# DUMPIR: %[[STAGE_NEXT_L:.*]] = arith.addi %arg15, %[[C6_STAGE]] : index
# DUMPIR: %[[C7_MOD:.*]] = arith.constant 7 : index
# DUMPIR: %[[STAGE_LOAD:.*]] = arith.remui %[[STAGE_NEXT_L]], %[[C7_MOD]] : index
# DUMPIR: %[[BID_X_L:.*]] = gpu.block_id x
# DUMPIR: %[[BID_Y_L:.*]] = gpu.block_id y
# DUMPIR: %[[BID_X_L:.*]] = gpu.block_id x
# DUMPIR: %[[BID_Y_L:.*]] = gpu.block_id y
# DUMPIR: %[[C128_L1:.*]] = arith.constant 128 : index
# DUMPIR: %[[DIMX_L:.*]] = arith.muli %[[BID_X_L]], %[[C128_L1]] : index
# DUMPIR: %[[C128_L2:.*]] = arith.constant 128 : index
# DUMPIR: %[[DIMY_L:.*]] = arith.muli %[[BID_Y_L]], %[[C128_L2]] : index
# DUMPIR: %[[TID_X_L1:.*]] = gpu.thread_id x
# DUMPIR: %[[TID_X_L2:.*]] = gpu.thread_id x
# DUMPIR: %[[TID_X_L1:.*]] = gpu.thread_id x
# DUMPIR: %[[TID_X_L2:.*]] = gpu.thread_id x
# DUMPIR: %[[C16384_LA1:.*]] = arith.constant 16384 : index
# DUMPIR: %[[OFF_A_LOAD:.*]] = arith.muli %[[STAGE_LOAD]], %[[C16384_LA1]] : index
# DUMPIR: %[[C16384_LA2:.*]] = arith.constant 16384 : index
@ -448,9 +448,9 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
# DUMPIR: scf.yield %[[ACC_L]], %[[NEW_PARITY]] : !nvgpu.warpgroup.accumulator<fragmented = vector<128x128xf32>>, i1
# DUMPIR: }
# DUMPIR: nvvm.wgmma.wait.group.sync.aligned 0
# DUMPIR: %[[TID_X_EPI:.*]] = gpu.thread_id x
# DUMPIR: %[[BID_X_EPI:.*]] = gpu.block_id x
# DUMPIR: %[[BID_Y_EPI:.*]] = gpu.block_id y
# DUMPIR: %[[TID_X_EPI:.*]] = gpu.thread_id x
# DUMPIR: %[[BID_X_EPI:.*]] = gpu.block_id x
# DUMPIR: %[[BID_Y_EPI:.*]] = gpu.block_id y
# DUMPIR: %[[C128_EPI1:.*]] = arith.constant 128 : index
# DUMPIR: %[[DIMX_EPI:.*]] = arith.muli %[[BID_X_EPI]], %[[C128_EPI1]] : index
# DUMPIR: %[[C128_EPI2:.*]] = arith.constant 128 : index

View File

@ -324,7 +324,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
# CHECK-NOT: Mismatched elements
# CHECK: PASS
# DUMPIR: %[[TID_X:.*]] = gpu.thread_id x
# DUMPIR: %[[TID_X:.*]] = gpu.thread_id x
# DUMPIR: %[[C128:.*]] = arith.constant 128 : index
# DUMPIR: %[[REM1:.*]] = arith.remui %[[TID_X]], %[[C128]] : index
# DUMPIR: %[[C0:.*]] = arith.constant 0 : index
@ -333,7 +333,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
# DUMPIR: %[[DIV1:.*]] = arith.divui %[[TID_X]], %[[C128_1]] : index
# DUMPIR: %[[C1:.*]] = arith.constant 1 : index
# DUMPIR: %[[IS_PRODUCER:.*]] = arith.cmpi eq, %[[DIV1]], %[[C1]] : index
# DUMPIR: %[[TID_X_2:.*]] = gpu.thread_id x
# DUMPIR: %[[TID_X_2:.*]] = gpu.thread_id x
# DUMPIR: %[[C128_2:.*]] = arith.constant 128 : index
# DUMPIR: %[[REM2:.*]] = arith.remui %[[TID_X_2]], %[[C128_2]] : index
# DUMPIR: %[[C0_2:.*]] = arith.constant 0 : index
@ -342,7 +342,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
# DUMPIR: %[[DIV2:.*]] = arith.divui %[[TID_X_2]], %[[C128_3]] : index
# DUMPIR: %[[C0_3:.*]] = arith.constant 0 : index
# DUMPIR: %[[IS_CONSUMER:.*]] = arith.cmpi eq, %[[DIV2]], %[[C0_3]] : index
# DUMPIR: %[[TID_X_3:.*]] = gpu.thread_id x
# DUMPIR: %[[TID_X_3:.*]] = gpu.thread_id x
# DUMPIR: %[[MBAR_MMA:.*]] = nvgpu.mbarrier.create -> <memorySpace = #gpu.address_space<workgroup>, num_barriers = 7>
# DUMPIR: %[[MBAR_TMA:.*]] = nvgpu.mbarrier.create -> <memorySpace = #gpu.address_space<workgroup>, num_barriers = 7>
# DUMPIR: %[[C0_4:.*]] = arith.constant 0 : index
@ -361,7 +361,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
# DUMPIR: nvgpu.tma.prefetch.descriptor %{{.*}} : <tensor = memref<64x64xf16, 3>, swizzle = swizzle_128b, l2promo = none, oob = zero, interleave = none>
# DUMPIR: }
# DUMPIR: scf.if %[[IS_PRODUCER]] {
# DUMPIR: nvvm.setmaxregister decrease 40
# DUMPIR: nvvm.setmaxregister decrease 40
# DUMPIR: %[[TRUE:.*]] = arith.constant true
# DUMPIR: %[[C0_PROD:.*]] = arith.constant 0 : index
# DUMPIR: %[[C16:.*]] = arith.constant 16 : index
@ -376,13 +376,13 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
# DUMPIR: %[[TRUE_2:.*]] = arith.constant true
# DUMPIR: %[[FLIP:.*]] = arith.xori %arg16, %[[TRUE_2]] : i1
# DUMPIR: %[[PHASE:.*]] = arith.select %[[IS_LAST]], %[[FLIP]], %arg16 : i1
# DUMPIR: %[[BID_X:.*]] = gpu.block_id x
# DUMPIR: %[[BID_Y:.*]] = gpu.block_id y
# DUMPIR: %[[BID_X:.*]] = gpu.block_id x
# DUMPIR: %[[BID_Y:.*]] = gpu.block_id y
# DUMPIR: %[[C128_TILE:.*]] = arith.constant 128 : index
# DUMPIR: %[[DIM_X:.*]] = arith.muli %[[BID_X]], %[[C128_TILE]] : index
# DUMPIR: %[[C128_TILE_2:.*]] = arith.constant 128 : index
# DUMPIR: %[[DIM_Y:.*]] = arith.muli %[[BID_Y]], %[[C128_TILE_2]] : index
# DUMPIR: %[[TID_PROD:.*]] = gpu.thread_id x
# DUMPIR: %[[TID_PROD:.*]] = gpu.thread_id x
# DUMPIR: %[[C16384:.*]] = arith.constant 16384 : index
# DUMPIR: %[[OFF_A:.*]] = arith.muli %[[SLOT]], %[[C16384]] : index
# DUMPIR: %[[C16384_2:.*]] = arith.constant 16384 : index
@ -414,7 +414,7 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
# DUMPIR: }
# DUMPIR: }
# DUMPIR: scf.if %[[IS_CONSUMER]] {
# DUMPIR: nvvm.setmaxregister increase 232
# DUMPIR: nvvm.setmaxregister increase 232
# DUMPIR: %[[FALSE:.*]] = arith.constant false
# DUMPIR: %[[ACC_INIT:.*]] = nvgpu.warpgroup.mma.init.accumulator -> <fragmented = vector<128x128xf32>>
# DUMPIR: %[[C0_CONS:.*]] = arith.constant 0 : index
@ -456,9 +456,9 @@ if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
# DUMPIR: scf.yield %[[ACC]], %[[PHASE_CONS]] : !nvgpu.warpgroup.accumulator<fragmented = vector<128x128xf32>>, i1
# DUMPIR: }
# DUMPIR: nvvm.wgmma.wait.group.sync.aligned 0
# DUMPIR: %[[TID_EPI:.*]] = gpu.thread_id x
# DUMPIR: %[[BID_X_EPI:.*]] = gpu.block_id x
# DUMPIR: %[[BID_Y_EPI:.*]] = gpu.block_id y
# DUMPIR: %[[TID_EPI:.*]] = gpu.thread_id x
# DUMPIR: %[[BID_X_EPI:.*]] = gpu.block_id x
# DUMPIR: %[[BID_Y_EPI:.*]] = gpu.block_id y
# DUMPIR: %[[C128_EPI:.*]] = arith.constant 128 : index
# DUMPIR: %[[DIM_X_EPI:.*]] = arith.muli %[[BID_X_EPI]], %[[C128_EPI]] : index
# DUMPIR: %[[C128_EPI_2:.*]] = arith.constant 128 : index