From 75fcaef3b60367e293e4208c8d88803f8650f3e1 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Sat, 14 Feb 2026 09:47:53 +0800 Subject: [PATCH] [libclc] Fix memory_scope and memory_order of *mem_fence builtins (#181311) See OpenCL spec 6.15.12.5. https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#fences // Older syntax memory fences are equivalent to atomic_work_item_fence with the // same flags parameter, memory_scope_work_group scope, and ordering as follows: void mem_fence(cl_mem_fence_flags flags) // memory_order_acq_rel void read_mem_fence(cl_mem_fence_flags flags) // memory_order_acquire void write_mem_fence(cl_mem_fence_flags flags) // memory_order_release --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- libclc/opencl/lib/amdgcn/mem_fence/fence.cl | 15 ++++++++++----- libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl | 15 ++++++++++----- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl index 7e5d97bc6de6..38fb15c2c1de 100644 --- a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl +++ b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl @@ -10,17 +10,22 @@ #include _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) { - int memory_scope = __opencl_get_memory_scope(flags); - int memory_order = __ATOMIC_SEQ_CST; + int memory_scope = __MEMORY_SCOPE_WRKGRP; + int memory_order = __ATOMIC_ACQ_REL; __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); __clc_mem_fence(memory_scope, memory_order, memory_semantics); } -// We don't have separate mechanism for read and write fences _CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) { - mem_fence(flags); + int memory_scope = __MEMORY_SCOPE_WRKGRP; + int memory_order = __ATOMIC_ACQUIRE; + __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); + __clc_mem_fence(memory_scope, memory_order, memory_semantics); } _CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) { - mem_fence(flags); + int memory_scope = __MEMORY_SCOPE_WRKGRP; + int memory_order = __ATOMIC_RELEASE; + __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); + __clc_mem_fence(memory_scope, memory_order, memory_semantics); } diff --git a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl index c799cf2ad7dd..38fb15c2c1de 100644 --- a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl +++ b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl @@ -10,17 +10,22 @@ #include _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) { - int memory_scope = __opencl_get_memory_scope(flags); - int memory_order = __ATOMIC_SEQ_CST; + int memory_scope = __MEMORY_SCOPE_WRKGRP; + int memory_order = __ATOMIC_ACQ_REL; __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); __clc_mem_fence(memory_scope, memory_order, memory_semantics); } -// We do not have separate mechanism for read and write fences. _CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) { - mem_fence(flags); + int memory_scope = __MEMORY_SCOPE_WRKGRP; + int memory_order = __ATOMIC_ACQUIRE; + __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); + __clc_mem_fence(memory_scope, memory_order, memory_semantics); } _CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) { - mem_fence(flags); + int memory_scope = __MEMORY_SCOPE_WRKGRP; + int memory_order = __ATOMIC_RELEASE; + __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); + __clc_mem_fence(memory_scope, memory_order, memory_semantics); }