[OFFLOAD] Add spirv implementation for named barrier (#180393)

This change adds implementation for named barriers for SPIRV backend.
Since there is no built in API/intrinsics for named barrier in SPIRV,
the implementation loosely follows implementation for AMD
This commit is contained in:
fineg74 2026-03-27 12:14:09 -07:00 committed by GitHub
parent 3c625a179f
commit 1611a23a5b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
88 changed files with 56 additions and 96 deletions

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-and-run-generic
// XFAIL: intelgpu
#include <assert.h>
#include <omp.h>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <stdio.h>

View File

@ -1,7 +1,6 @@
// RUN: %libomptarget-compile-generic
// RUN: %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic
// XFAIL: intelgpu
#include <stdio.h>
#include <stdlib.h>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compilexx-run-and-check-generic
// XFAIL: intelgpu
#include <cstdlib>
#include <cstdio>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compilexx-run-and-check-generic
// XFAIL: intelgpu
#include <cstdlib>
#include <cstdio>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compilexx-run-and-check-generic
// XFAIL: intelgpu
#include <cstdlib>
#include <cstdio>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compilexx-run-and-check-generic
// XFAIL: intelgpu
#include <stdio.h>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compilexx-run-and-check-generic
// XFAIL: intelgpu
#include <cstdio>
#include <cstdlib>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compilexx-run-and-check-generic
// XFAIL: intelgpu
#include <cstdio>
#include <cstdlib>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compilexx-run-and-check-generic
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -1,6 +1,5 @@
// clang-format off
// RUN: %libomptarget-compilexx-generic -Wno-openmp-mapping && %libomptarget-run-generic
// XFAIL: intelgpu
// clang-format on

View File

@ -1,6 +1,5 @@
// clang-format off
// RUN: %libomptarget-compilexx-generic -Wno-openmp-mapping && %libomptarget-run-generic
// XFAIL: intelgpu
#include <assert.h>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -1,6 +1,5 @@
// RUN: %libomptarget-compileopt-generic -fno-exceptions
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
// XFAIL: intelgpu
#include <stdint.h>
#include <stdio.h>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compilexx-run-and-check-generic
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <stdio.h>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <stdio.h>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <stdio.h>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <stdio.h>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <stdio.h>

View File

@ -1,6 +1,5 @@
// RUN: %libomptarget-compile-generic -fopenmp-extensions
// RUN: %libomptarget-run-generic | %fcheck-generic -strict-whitespace
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -3,7 +3,6 @@
// Clang 6.0 doesn't use the new map interface, undefined behavior when
// the compiler emits "old" interface code for structures.
// UNSUPPORTED: clang-6
// XFAIL: intelgpu
#include <stdio.h>
#include <stdlib.h>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <stdio.h>

View File

@ -15,7 +15,6 @@
// RUN: -DEXTENDS=AFTER
// RUN: %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic
// XFAIL: intelgpu
// END.

View File

@ -1,7 +1,6 @@
// RUN: %libomptarget-compile-generic -fopenmp-version=51
// RUN: %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic
// XFAIL: intelgpu
extern "C" int printf(const char *, ...);
template <typename T> class A {

View File

@ -1,7 +1,6 @@
// RUN: %libomptarget-compile-generic -fopenmp-version=51
// RUN: %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic
// XFAIL: intelgpu
#include <stdio.h>
int main() {

View File

@ -7,7 +7,8 @@
// RUN: %libomptarget-run-generic | %fcheck-generic
// RUN: %libomptarget-compileopt-generic -fopenmp-version=51
// RUN: %libomptarget-run-generic | %fcheck-generic
// XFAIL: intelgpu
// https://github.com/llvm/llvm-project/issues/182119
// UNSUPPORTED: intelgpu
// High parallelism increases our chances of detecting a lack of atomicity.
#define NUM_THREADS_TRY 256

View File

@ -1,6 +1,7 @@
// clang-format off
// RUN: %libomptarget-compile-generic -O3 && %libomptarget-run-generic | %fcheck-generic
// XFAIL: intelgpu
// https://github.com/llvm/llvm-project/issues/182119
// UNSUPPORTED: intelgpu
// clang-format on
#include <omp.h>
#include <stdio.h>

View File

@ -1,6 +1,5 @@
// RUN: %libomptarget-compilexx-run-and-check-generic
// RUN: %libomptarget-compileoptxx-run-and-check-generic
// XFAIL: intelgpu
#include <cassert>
#include <iostream>

View File

@ -3,7 +3,8 @@
// RUN: %libomptarget-compilexx-generic -O3 -ffast-math && %libomptarget-run-generic
// RUN: %libomptarget-compileoptxx-generic -O3 && %libomptarget-run-generic
// RUN: %libomptarget-compileoptxx-generic -O3 -ffast-math && %libomptarget-run-generic
// XFAIL: intelgpu
// https://github.com/llvm/llvm-project/issues/182119
// UNSUPPORTED: intelgpu
// clang-format on
#include <iostream>

View File

@ -1,6 +1,5 @@
// RUN: %libomptarget-compilexx-and-run-generic
// RUN: %libomptarget-compileoptxx-and-run-generic
// XFAIL: intelgpu
#include <cassert>
#include <iostream>

View File

@ -33,7 +33,8 @@
// RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=CUSTOM -input-file=%t.custom
// RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=CUSTOM -input-file=%t.custom
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
// XFAIL: intelgpu
// https://github.com/llvm/llvm-project/issues/182119
// UNSUPPORTED: intelgpu
//
// CUSTOM: Rewriting generic-mode kernel with a customized state machine.

View File

@ -1,7 +1,8 @@
// RUN: %libomptarget-compile-generic -O2 && %libomptarget-run-generic
// -O2 to run openmp-opt
// RUN: %libomptarget-compileopt-generic -O2 && %libomptarget-run-generic
// XFAIL: intelgpu
// https://github.com/llvm/llvm-project/issues/182119
// UNSUPPORTED: intelgpu
int main(void) {
long int aa = 0;

View File

@ -1,6 +1,5 @@
// RUN: %libomptarget-compilexx-and-run-generic
// RUN: %libomptarget-compileoptxx-and-run-generic
// XFAIL: intelgpu
#include <cassert>
#include <iostream>

View File

@ -6,6 +6,7 @@
// UNSUPPORTED: amdgcn-amd-amdhsa
// UNSUPPORTED: nvptx64-nvidia-cuda
// UNSUPPORTED: nvptx64-nvidia-cuda-LTO
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -1,7 +1,6 @@
// clang-format off
// RUN: %libomptarget-compile-generic -DVAR -c -o %t.o
// RUN: %libomptarget-compile-generic %t.o && %libomptarget-run-generic | %fcheck-generic
// XFAIL: intelgpu
// clang-format on
#ifdef VAR
int x = 1;

View File

@ -1,6 +1,7 @@
// RUN: %libomptarget-compile-run-and-check-generic
// RUN: %libomptarget-compileopt-run-and-check-generic
// XFAIL: intelgpu
// https://github.com/llvm/llvm-project/issues/182119
// UNSUPPORTED: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -1,6 +1,5 @@
// clang-format off
// RUN: %libomptarget-compilexx-generic && %libomptarget-run-generic | %fcheck-generic
// XFAIL: intelgpu
// clang-format on
#include <cstdio>

View File

@ -6,7 +6,6 @@
// - Works whether it's specified directly or as the default device.
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -9,7 +9,8 @@
// clang-format on
// REQUIRES: gpu
// XFAIL: intelgpu
// https://github.com/llvm/llvm-project/issues/182119
// UNSUPPORTED: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -2,7 +2,6 @@
// RUN: %libomptarget-compile-generic -DLIBRARY -c -o %t.o
// RUN: ar rcs %t.a %t.o
// RUN: %libomptarget-compile-generic %t.a && %libomptarget-run-generic 2>&1 | %fcheck-generic
// XFAIL: intelgpu
// clang-format on
#ifdef LIBRARY

View File

@ -3,7 +3,6 @@
// from the device to the host.
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -3,7 +3,6 @@
// from the host to the device.
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -3,7 +3,6 @@
// across the array
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -3,7 +3,6 @@
// across the array
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -3,7 +3,6 @@
// from the device to the host using dynamically allocated memory.
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>

View File

@ -3,7 +3,6 @@
// from the host to the device using dynamically allocated memory.
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>

View File

@ -3,7 +3,6 @@
// across the array using dynamically allocated memory.
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>

View File

@ -3,7 +3,6 @@
// across the array using dynamically allocated memory.
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>

View File

@ -4,7 +4,6 @@
// where the count (len/2) is a variable expression, not a constant.
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -4,7 +4,6 @@
// other element (stride 2) from the device to the host
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -4,7 +4,6 @@
// same array with various count expressions.
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
// Tests multiple arrays with different variable strides in single update
// clause.

View File

@ -4,7 +4,6 @@
// other element (stride 2) from the host to the device
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
// Tests combining variable count expression AND variable stride in array
// sections.

View File

@ -2,7 +2,6 @@
// Tests data[0:5:stride] where stride is a variable, making it non-contiguous.
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
// Miscellaneous variable stride tests: stride=1, stride=array_size, stride from
// array subscript.

View File

@ -3,7 +3,6 @@
// RUN: %libomptarget-compile-generic -fopenmp-version=51
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
// XFAIL: intelgpu
#include <stdio.h>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compileoptxx-run-and-check-generic
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -4,8 +4,7 @@
// UNSUPPORTED: nvptx64-nvidia-cuda
// UNSUPPORTED: nvptx64-nvidia-cuda-LTO
// UNSUPPORTED: amdgcn-amd-amdhsa
// https://github.com/llvm/llvm-project/issues/182119
// UNSUPPORTED: intelgpu
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compilexx-run-and-check-generic
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -1,6 +1,5 @@
// RUN: %libomptarget-compilexx-and-run-generic
// RUN: %libomptarget-compileoptxx-and-run-generic
// XFAIL: intelgpu
#include <cassert>

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
// This test checks that "update from" clause in OpenMP supports strided
// sections. #pragma omp target update from(result[0:N/2:2]) updates every other
// element from device

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
// Tests non-contiguous array sections with expression-based count on
// heap-allocated pointer arrays with both FROM and TO directives.

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
// Tests heap-allocated pointers with both variable count expression and
// variable stride.

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
// Tests non-contiguous array sections with variable stride on heap-allocated
// pointers.

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
// Tests non-contiguous array sections with expression-based count on struct
// member arrays with both FROM and TO directives.

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
// This test checks that "update from" with user-defined mapper supports strided
// sections using fixed-size arrays in structs.

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
// This test checks that #pragma omp target update from(s1.data[0:6:2],
// s2.data[0:4:3]) correctly updates strided sections covering the full arrays
// from device to host.

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
// This test checks that #pragma omp target update to(s1.data[0:6:2],
// s2.data[0:4:3]) correctly updates strided sections covering the full arrays
// from host to device.

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
// This test checks that #pragma omp target update from(s.data[0:2:3]) correctly
// updates every third element (stride 3) from the device to the host
// using a struct with fixed-size array member.

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
// This test checks that #pragma omp target update to(s.data[0:2:3]) correctly
// updates every third element (stride 3) from the host to the device
// for struct member arrays.

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
// This test checks that "update to" with struct member arrays supports strided
// sections using fixed-size arrays in structs.

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
// Tests struct member arrays with both variable count expression and variable
// stride.

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
// Tests non-contiguous array sections with variable stride on struct member
// arrays.

View File

@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
// This test checks that "update to" clause in OpenMP supports strided sections.
// #pragma omp target update to(result[0:8:2]) updates every other element
// (stride 2)

View File

@ -1,6 +1,7 @@
// RUN: %libomptarget-compile-run-and-check-generic
// RUN: %libomptarget-compileopt-run-and-check-generic
// XFAIL: intelgpu
// https://github.com/llvm/llvm-project/issues/182119
// UNSUPPORTED: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -1,6 +1,7 @@
// This fails when optimized for now.
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
// https://github.com/llvm/llvm-project/issues/182119
// UNSUPPORTED: intelgpu
// XUN: %libomptarget-compileopt-run-and-check-generic
#include <omp.h>

View File

@ -12,7 +12,8 @@
// UNSUPPORTED: aarch64-unknown-linux-gnu
// UNSUPPORTED: x86_64-unknown-linux-gnu
// UNSUPPORTED: s390x-ibm-linux-gnu
// XFAIL: intelgpu
// https://github.com/llvm/llvm-project/issues/182119
// UNSUPPORTED: intelgpu
#include <omp.h>

View File

@ -12,7 +12,8 @@
// UNSUPPORTED: aarch64-unknown-linux-gnu
// UNSUPPORTED: x86_64-unknown-linux-gnu
// UNSUPPORTED: s390x-ibm-linux-gnu
// XFAIL: intelgpu
// https://github.com/llvm/llvm-project/issues/182119
// UNSUPPORTED: intelgpu
#include <omp.h>

View File

@ -10,7 +10,8 @@
// UNSUPPORTED: aarch64-unknown-linux-gnu
// UNSUPPORTED: x86_64-unknown-linux-gnu
// UNSUPPORTED: s390x-ibm-linux-gnu
// XFAIL: intelgpu
// https://github.com/llvm/llvm-project/issues/182119
// UNSUPPORTED: intelgpu
#include <omp.h>

View File

@ -12,7 +12,8 @@
// UNSUPPORTED: aarch64-unknown-linux-gnu
// UNSUPPORTED: x86_64-unknown-linux-gnu
// UNSUPPORTED: s390x-ibm-linux-gnu
// XFAIL: intelgpu
// https://github.com/llvm/llvm-project/issues/182119
// UNSUPPORTED: intelgpu
#include <omp.h>

View File

@ -13,7 +13,8 @@
// UNSUPPORTED: aarch64-unknown-linux-gnu
// UNSUPPORTED: x86_64-unknown-linux-gnu
// UNSUPPORTED: s390x-ibm-linux-gnu
// XFAIL: intelgpu
// https://github.com/llvm/llvm-project/issues/182119
// UNSUPPORTED: intelgpu
struct S {};

View File

@ -12,7 +12,8 @@
// UNSUPPORTED: aarch64-unknown-linux-gnu
// UNSUPPORTED: x86_64-unknown-linux-gnu
// UNSUPPORTED: s390x-ibm-linux-gnu
// XFAIL: intelgpu
// https://github.com/llvm/llvm-project/issues/182119
// UNSUPPORTED: intelgpu
#include <omp.h>

View File

@ -10,7 +10,8 @@
// UNSUPPORTED: aarch64-unknown-linux-gnu
// UNSUPPORTED: x86_64-unknown-linux-gnu
// UNSUPPORTED: s390x-ibm-linux-gnu
// XFAIL: intelgpu
// https://github.com/llvm/llvm-project/issues/182119
// UNSUPPORTED: intelgpu
#include <omp.h>

View File

@ -2,7 +2,6 @@
// REQUIRES: unified_shared_memory
// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9
// XFAIL: intelgpu
#include <assert.h>
#include <omp.h>

View File

@ -2,7 +2,6 @@
// REQUIRES: unified_shared_memory
// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9
// XFAIL: intelgpu
#include <omp.h>
#include <stdio.h>

View File

@ -181,8 +181,27 @@ void setCriticalLock(omp_lock_t *Lock) { setLock(Lock); }
///}
#if defined(__SPIRV__)
void namedBarrierInit() { __builtin_trap(); } // TODO
void namedBarrier() { __builtin_trap(); } // TODO
[[clang::loader_uninitialized]] Local<uint32_t> namedBarrierTracker;
void namedBarrierInit() {
atomic::store(&namedBarrierTracker, 0u, atomic::seq_cst, atomic::workgroup);
}
void namedBarrier() {
uint32_t NumThreads = omp_get_num_threads();
uint32_t load =
atomic::add(&namedBarrierTracker, 1, atomic::seq_cst, atomic::workgroup);
if (load >= NumThreads - 1) {
atomic::store(&namedBarrierTracker, 0u, atomic::seq_cst, atomic::workgroup);
} else {
do {
load = atomic::load(&namedBarrierTracker, atomic::seq_cst,
atomic::workgroup);
} while (load != 0);
}
}
void unsetLock(omp_lock_t *Lock) {
atomic::store((int32_t *)Lock, 0, atomic::seq_cst);
@ -194,13 +213,7 @@ void initLock(omp_lock_t *Lock) { unsetLock(Lock); }
void destroyLock(omp_lock_t *Lock) { unsetLock(Lock); }
void setLock(omp_lock_t *Lock) {
int32_t *Lock_ptr = (int32_t *)Lock;
bool Acquired = false;
int32_t Expected;
while (!Acquired) {
Expected = 0;
if (Expected == atomic::load(Lock_ptr, atomic::seq_cst))
Acquired =
atomic::cas(Lock_ptr, Expected, 1, atomic::seq_cst, atomic::seq_cst);
while (!atomic::cas(Lock_ptr, 0, 1, atomic::seq_cst, atomic::seq_cst)) {
}
}