[mlir][GPU] Add constant address space to GPU dialect (#190211)

This PR adds a `constant` address space to the` GPU dialect and
lowerings to all GPU backends.

Signed-off-by: Bangtian Liu <liubangtian@gmail.com>
This commit is contained in:
Bangtian Liu 2026-04-02 15:02:12 -04:00 committed by GitHub
parent f7b6cc1efa
commit 86b5f11ecc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 155 additions and 3 deletions

View File

@ -28,13 +28,16 @@ is being used.
## GPU address spaces
The GPU dialect exposes the `gpu.address_space` attribute, which currently has
three values: `global`, `workgroup`, and `private`.
four values: `global`, `workgroup`, `private`, and `constant`.
These address spaces represent the types of buffer commonly seen in GPU compilation.
`global` memory is memory that resides in the GPU's global memory. `workgroup`
memory is a limited, per-workgroup resource: all threads in a workgroup/thread
block access the same values in `workgroup` memory. Finally, `private` memory is
block access the same values in `workgroup` memory. `private` memory is
used to represent `alloca`-like buffers that are private to a single thread/workitem.
`constant` memory is read-only memory residing in global address space, guaranteed
not to change during kernel execution, allowing backend-specific optimizations
(e.g., scalar reads on AMD GPUs).
These address spaces may be used as the `memorySpace` attribute on `memref` values.
The `gpu.module`/`gpu.func` compilation pipeline will lower such memory space

View File

@ -53,6 +53,10 @@ def GPU_Dialect : Dialect {
/// space.
static AddressSpace getPrivateAddressSpace() { return AddressSpace::Private; }
/// Returns the numeric value used to identify the constant memory address
/// space.
static AddressSpace getConstantAddressSpace() { return AddressSpace::Constant; }
/// Return true if the given MemRefType has an address space that matches
/// with the gpu::AddressSpaceAttr attribute with value 'workgroup`.
static bool hasWorkgroupMemoryAddressSpace(MemRefType type);
@ -60,6 +64,14 @@ def GPU_Dialect : Dialect {
/// Return true if the given Attribute is an gpu::AddressSpaceAttr
/// attribute with value 'workgroup`.
static bool isWorkgroupMemoryAddressSpace(Attribute memorySpace);
/// Return true if the given MemRefType has an address space that matches
/// with the gpu::AddressSpaceAttr attribute with value 'constant`.
static bool hasConstantMemoryAddressSpace(MemRefType type);
/// Return true if the given Attribute is an gpu::AddressSpaceAttr
/// attribute with value 'constant`.
static bool isConstantMemoryAddressSpace(Attribute memorySpace);
}];
let discardableAttrs = (ins
"::mlir::DenseI32ArrayAttr":$known_block_size,
@ -89,11 +101,13 @@ class GPU_I32EnumAttr<string mnemonic, GPU_I32Enum enumInfo> :
def GPU_AddressSpaceGlobal : I32EnumAttrCase<"Global", 1, "global">;
def GPU_AddressSpaceWorkgroup : I32EnumAttrCase<"Workgroup", 2, "workgroup">;
def GPU_AddressSpacePrivate : I32EnumAttrCase<"Private", 3, "private">;
def GPU_AddressSpaceConstant : I32EnumAttrCase<"Constant", 4, "constant">;
def GPU_AddressSpaceEnum : GPU_I32Enum<
"AddressSpace", "GPU address space", [
GPU_AddressSpaceGlobal,
GPU_AddressSpaceWorkgroup,
GPU_AddressSpacePrivate
GPU_AddressSpacePrivate,
GPU_AddressSpaceConstant
]>;
def GPU_AddressSpaceAttr :

View File

@ -131,6 +131,8 @@ def ROCDL_Dialect : Dialect {
static constexpr unsigned kGlobalMemoryAddressSpace = 1;
/// The address space value that represents shared memory.
static constexpr unsigned kSharedMemoryAddressSpace = 3;
/// The address space value that represents constant memory.
static constexpr unsigned kConstantMemoryAddressSpace = 4;
/// The address space value that represents private memory.
static constexpr unsigned kPrivateMemoryAddressSpace = 5;
}];

View File

@ -4105,6 +4105,8 @@ void mlir::amdgpu::populateCommonGPUTypeAndAttributeConversions(
return ROCDL::ROCDLDialect::kSharedMemoryAddressSpace;
case gpu::AddressSpace::Private:
return ROCDL::ROCDLDialect::kPrivateMemoryAddressSpace;
case gpu::AddressSpace::Constant:
return ROCDL::ROCDLDialect::kConstantMemoryAddressSpace;
}
llvm_unreachable("unknown address space enum value");
});

View File

@ -17,6 +17,8 @@ spirv::StorageClass addressSpaceToStorageClass(gpu::AddressSpace addressSpace) {
return spirv::StorageClass::Workgroup;
case gpu::AddressSpace::Private:
return spirv::StorageClass::Private;
case gpu::AddressSpace::Constant:
return spirv::StorageClass::UniformConstant;
}
llvm_unreachable("Unhandled storage class");
}

View File

@ -138,6 +138,8 @@ struct GPUBarrierConversion final : ConvertOpToLLVMPattern<gpu::BarrierOp> {
memFenceFlag = memFenceFlag | localMemFenceFlag;
break;
case gpu::AddressSpace::Private:
case gpu::AddressSpace::Constant:
// Private is thread-local, constant is read-only; no fencing needed.
break;
}
}

View File

@ -545,6 +545,8 @@ struct GPUBarrierOpLowering final : ConvertOpToLLVMPattern<gpu::BarrierOp> {
fenceLDS = true;
break;
case gpu::AddressSpace::Private:
case gpu::AddressSpace::Constant:
// Private is thread-local, constant is read-only; no fencing needed.
break;
}
}

View File

@ -1723,6 +1723,8 @@ void mlir::nvgpu::populateCommonGPUTypeAndAttributeConversions(
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Shared);
case gpu::AddressSpace::Private:
return 0;
case gpu::AddressSpace::Constant:
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Constant);
}
llvm_unreachable("unknown address space enum value");
});

View File

@ -248,6 +248,19 @@ bool GPUDialect::hasWorkgroupMemoryAddressSpace(MemRefType type) {
return isWorkgroupMemoryAddressSpace(memorySpace);
}
bool GPUDialect::isConstantMemoryAddressSpace(Attribute memorySpace) {
if (!memorySpace)
return false;
if (auto gpuAttr = llvm::dyn_cast<gpu::AddressSpaceAttr>(memorySpace))
return gpuAttr.getValue() == getConstantAddressSpace();
return false;
}
bool GPUDialect::hasConstantMemoryAddressSpace(MemRefType type) {
Attribute memorySpace = type.getMemorySpace();
return isConstantMemoryAddressSpace(memorySpace);
}
bool GPUDialect::isKernel(Operation *op) {
UnitAttr isKernelAttr = op->getAttrOfType<UnitAttr>(getKernelFuncAttrName());
return static_cast<bool>(isKernelAttr);

View File

@ -0,0 +1,26 @@
// RUN: mlir-opt %s -convert-gpu-to-llvm-spv | FileCheck %s
gpu.module @kernels {
// CHECK-LABEL: llvm.func spir_kernelcc @constant_load
// Constant address space maps to SPIRV/OpenCL address space 2 (UniformConstant)
// CHECK-SAME: !llvm.ptr<2>
gpu.func @constant_load(%arg0: memref<16xf32, #gpu.address_space<constant>>) kernel {
%c0 = arith.constant 0 : index
%v = memref.load %arg0[%c0] : memref<16xf32, #gpu.address_space<constant>>
gpu.return
}
// CHECK-LABEL: llvm.func spir_funccc @all_address_spaces
// Global -> 1, Workgroup -> 3, Private -> 0 (default), Constant -> 2
// CHECK-SAME: !llvm.ptr<1>
// CHECK-SAME: !llvm.ptr<3>
// CHECK-SAME: !llvm.ptr,
// CHECK-SAME: !llvm.ptr<2>
gpu.func @all_address_spaces(
%arg0: memref<f32, #gpu.address_space<global>>,
%arg1: memref<f32, #gpu.address_space<workgroup>>,
%arg2: memref<f32, #gpu.address_space<private>>,
%arg3: memref<f32, #gpu.address_space<constant>>) {
gpu.return
}
}

View File

@ -238,6 +238,10 @@ gpu.module @barriers {
// CHECK: [[NONE_FLAG2:%.*]] = llvm.mlir.constant(0 : i32) : i32
// CHECK: llvm.call spir_funccc @_Z7barrierj([[NONE_FLAG2]])
gpu.barrier memfence [#gpu.address_space<private>]
// Constant memory is read-only, no fencing needed (same as private)
// CHECK: [[NONE_FLAG3:%.*]] = llvm.mlir.constant(0 : i32) : i32
// CHECK: llvm.call spir_funccc @_Z7barrierj([[NONE_FLAG3]])
gpu.barrier memfence [#gpu.address_space<constant>]
return
}
}

View File

@ -0,0 +1,22 @@
// RUN: mlir-opt -convert-gpu-to-nvvm %s | FileCheck %s
module attributes {gpu.container_module} {
gpu.module @kernel_module {
// CHECK-LABEL: llvm.func @constant_load
// CHECK-SAME: %{{.*}}: !llvm.ptr<4>
gpu.func @constant_load(%arg0: memref<16xf32, #gpu.address_space<constant>>) kernel {
%c0 = arith.constant 0 : index
%v = memref.load %arg0[%c0] : memref<16xf32, #gpu.address_space<constant>>
gpu.return
}
// CHECK-LABEL: llvm.func @constant_multidim
// CHECK-SAME: %{{.*}}: !llvm.ptr<4>
gpu.func @constant_multidim(%arg0: memref<4x8xf32, #gpu.address_space<constant>>) kernel {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%v = memref.load %arg0[%c0, %c1] : memref<4x8xf32, #gpu.address_space<constant>>
gpu.return
}
}
}

View File

@ -0,0 +1,22 @@
// RUN: mlir-opt -convert-gpu-to-rocdl %s | FileCheck %s
module attributes {gpu.container_module} {
gpu.module @kernel_module {
// CHECK-LABEL: llvm.func @constant_load
// CHECK-SAME: %{{.*}}: !llvm.ptr<4>
gpu.func @constant_load(%arg0: memref<16xf32, #gpu.address_space<constant>>) kernel {
%c0 = arith.constant 0 : index
%v = memref.load %arg0[%c0] : memref<16xf32, #gpu.address_space<constant>>
gpu.return
}
// CHECK-LABEL: llvm.func @constant_multidim
// CHECK-SAME: %{{.*}}: !llvm.ptr<4>
gpu.func @constant_multidim(%arg0: memref<4x8xf32, #gpu.address_space<constant>>) kernel {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%v = memref.load %arg0[%c0, %c1] : memref<4x8xf32, #gpu.address_space<constant>>
gpu.return
}
}
}

View File

@ -65,4 +65,16 @@ func.func @barrier_private_only() {
gpu.barrier memfence [#gpu.address_space<private>]
func.return
}
// GFX9-LABEL: func @barrier_constant_only
// GFX12-LABEL: func @barrier_constant_only
func.func @barrier_constant_only() {
// GFX9-NEXT: rocdl.s.barrier
// GFX12-NEXT: rocdl.s.barrier.signal id = -1
// GFX12-NEXT: rocdl.s.barrier.wait id = -1
// CHECK-NOT: llvm.fence
// Constant memory is read-only, no fencing needed
gpu.barrier memfence [#gpu.address_space<constant>]
func.return
}
}

View File

@ -0,0 +1,23 @@
// RUN: mlir-opt -allow-unregistered-dialect %s | FileCheck %s
gpu.module @test {
// CHECK-LABEL: @constant_memref_basic
// CHECK-SAME: (%{{.*}}: memref<16xf32, #gpu.address_space<constant>>)
gpu.func @constant_memref_basic(%arg0: memref<16xf32, #gpu.address_space<constant>>) kernel {
%c0 = arith.constant 0 : index
%0 = memref.load %arg0[%c0] : memref<16xf32, #gpu.address_space<constant>>
gpu.return
}
// CHECK-LABEL: @constant_memref_multidim
// CHECK: memref<4x8xf32, #gpu.address_space<constant>>
gpu.func @constant_memref_multidim(%arg0: memref<4x8xf32, #gpu.address_space<constant>>) kernel {
gpu.return
}
// CHECK-LABEL: @constant_memref_dynamic
// CHECK: memref<?x?xf32, #gpu.address_space<constant>>
gpu.func @constant_memref_dynamic(%arg0: memref<?x?xf32, #gpu.address_space<constant>>) kernel {
gpu.return
}
}

View File

@ -186,6 +186,7 @@ module attributes {gpu.container_module} {
gpu.barrier memfence [#gpu.address_space<global>]
gpu.barrier memfence [#gpu.address_space<global>, #gpu.address_space<workgroup>]
gpu.barrier memfence [#gpu.address_space<private>]
gpu.barrier memfence [#gpu.address_space<constant>]
gpu.barrier memfence []
"some_op"(%bIdX, %tIdX) : (index, index) -> ()