[libclc] Fix memory_scope and memory_order of *mem_fence builtins (#181311)

See OpenCL spec 6.15.12.5.
https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#fences

// Older syntax memory fences are equivalent to atomic_work_item_fence
with the // same flags parameter, memory_scope_work_group scope, and
ordering as follows:
void mem_fence(cl_mem_fence_flags flags)        // memory_order_acq_rel
void read_mem_fence(cl_mem_fence_flags flags)   // memory_order_acquire
void write_mem_fence(cl_mem_fence_flags flags)  // memory_order_release

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Wenju He 2026-02-14 09:47:53 +08:00 committed by GitHub
parent edae8a4485
commit 75fcaef3b6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 20 additions and 10 deletions

View File

@ -10,17 +10,22 @@
#include <clc/opencl/synchronization/utils.h>
_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
int memory_scope = __opencl_get_memory_scope(flags);
int memory_order = __ATOMIC_SEQ_CST;
int memory_scope = __MEMORY_SCOPE_WRKGRP;
int memory_order = __ATOMIC_ACQ_REL;
__CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
__clc_mem_fence(memory_scope, memory_order, memory_semantics);
}
// We don't have separate mechanism for read and write fences
_CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
mem_fence(flags);
int memory_scope = __MEMORY_SCOPE_WRKGRP;
int memory_order = __ATOMIC_ACQUIRE;
__CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
__clc_mem_fence(memory_scope, memory_order, memory_semantics);
}
_CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {
mem_fence(flags);
int memory_scope = __MEMORY_SCOPE_WRKGRP;
int memory_order = __ATOMIC_RELEASE;
__CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
__clc_mem_fence(memory_scope, memory_order, memory_semantics);
}

View File

@ -10,17 +10,22 @@
#include <clc/opencl/synchronization/utils.h>
_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
int memory_scope = __opencl_get_memory_scope(flags);
int memory_order = __ATOMIC_SEQ_CST;
int memory_scope = __MEMORY_SCOPE_WRKGRP;
int memory_order = __ATOMIC_ACQ_REL;
__CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
__clc_mem_fence(memory_scope, memory_order, memory_semantics);
}
// We do not have separate mechanism for read and write fences.
_CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
mem_fence(flags);
int memory_scope = __MEMORY_SCOPE_WRKGRP;
int memory_order = __ATOMIC_ACQUIRE;
__CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
__clc_mem_fence(memory_scope, memory_order, memory_semantics);
}
_CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {
mem_fence(flags);
int memory_scope = __MEMORY_SCOPE_WRKGRP;
int memory_order = __ATOMIC_RELEASE;
__CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
__clc_mem_fence(memory_scope, memory_order, memory_semantics);
}