From 1474e3e4f46550f66ed7ab4c5b2810ffadb630f1 Mon Sep 17 00:00:00 2001 From: xys-syx Date: Thu, 2 Apr 2026 14:41:50 -0500 Subject: [PATCH] [MLIR][NVVM] Derive NVVM_SyncWarpOp from NVVM_IntrOp for import support (#188415) Change `NVVM_SyncWarpOp` base class from `NVVM_Op` to `NVVM_IntrOp<"bar.warp.sync">`, which auto-generates `llvmEnumName = nvvm_bar_warp_sync` and registers it with `-gen-intr-from-llvmir-conversions` and `-gen-convertible-llvmir-intrinsics`. This enables LLVM IR to MLIR import. The hand-written `llvmBuilder` is removed as the default `LLVM_IntrOpBase` builder is equivalent. --- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 5 +---- mlir/test/Target/LLVMIR/Import/nvvmir.ll | 9 +++++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 87fd75f5a3e1..82fbdf8e2996 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -1592,7 +1592,7 @@ def NVVM_VoteSyncOp } def NVVM_SyncWarpOp : - NVVM_Op<"bar.warp.sync">, + NVVM_IntrOp<"bar.warp.sync">, Arguments<(ins I32:$mask)> { let summary = "Warp Barrier Synchronization Op"; let description = [{ @@ -1621,9 +1621,6 @@ def NVVM_SyncWarpOp : [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar-warp-sync) }]; - string llvmBuilder = [{ - createIntrinsicCall(builder, llvm::Intrinsic::nvvm_bar_warp_sync, {$mask}); - }]; let assemblyFormat = "$mask attr-dict `:` type($mask)"; } diff --git a/mlir/test/Target/LLVMIR/Import/nvvmir.ll b/mlir/test/Target/LLVMIR/Import/nvvmir.ll index 2da0b0ceb2cf..1430f9a44eba 100644 --- a/mlir/test/Target/LLVMIR/Import/nvvmir.ll +++ b/mlir/test/Target/LLVMIR/Import/nvvmir.ll @@ -78,6 +78,13 @@ define void @llvm_nvvm_barrier0() { ret void } +; CHECK-LABEL: @llvm_nvvm_bar_warp_sync +define void @llvm_nvvm_bar_warp_sync(i32 %mask) { + ; CHECK: nvvm.bar.warp.sync %{{.*}} : i32 + call void @llvm.nvvm.bar.warp.sync(i32 %mask) + ret void +} + ; TODO: Support the intrinsics below once they derive from NVVM_IntrOp rather than from NVVM_Op. ; ; define i32 @nvvm_shfl(i32 %0, i32 %1, i32 %2, i32 %3, float %4) { @@ -269,6 +276,8 @@ declare float @llvm.nvvm.rcp.approx.ftz.f(float) declare void @llvm.nvvm.barrier0() +declare void @llvm.nvvm.bar.warp.sync(i32) + declare i32 @llvm.nvvm.shfl.sync.bfly.i32(i32, i32, i32, i32) declare float @llvm.nvvm.shfl.sync.bfly.f32(i32, float, i32, i32)