From 161c53f66ea8044e6618d7c7829ba90196c72a79 Mon Sep 17 00:00:00 2001 From: David Truby Date: Tue, 31 Mar 2026 13:07:19 +0100 Subject: [PATCH] [openmp] Add support for arm64ec to libomp (#176151) This patch adds arm64ec support to libomp. Note that this support isn't entirely usable on Windows hosts as libomp requires LLVM_PER_TARGET_RUNTIME_DIR=On for to work correctly when multiple runtimes are built, which is unsupported on Windows. A following patch will add arm64x support to the build to rectify this. --- openmp/cmake/modules/LibompUtils.cmake | 2 + openmp/runtime/CMakeLists.txt | 6 +- .../runtime/cmake/LibompGetArchitecture.cmake | 3 + openmp/runtime/src/CMakeLists.txt | 1 + openmp/runtime/src/kmp.h | 3 +- openmp/runtime/src/kmp_atomic.cpp | 2 +- openmp/runtime/src/kmp_invoke_microtask.cpp | 140 ++++++++++++++++++ openmp/runtime/src/kmp_os.h | 10 +- openmp/runtime/src/kmp_platform.h | 8 +- openmp/runtime/src/kmp_runtime.cpp | 2 +- .../thirdparty/ittnotify/ittnotify_config.h | 2 + openmp/runtime/src/z_Linux_asm.S | 2 +- openmp/runtime/src/z_Linux_util.cpp | 139 ----------------- openmp/runtime/src/z_Windows_NT-586_util.cpp | 6 +- .../test/misc_bugs/many-microtask-args.c | 4 + 15 files changed, 178 insertions(+), 152 deletions(-) create mode 100644 openmp/runtime/src/kmp_invoke_microtask.cpp diff --git a/openmp/cmake/modules/LibompUtils.cmake b/openmp/cmake/modules/LibompUtils.cmake index 0de8e7dbe153..011e675d62e3 100644 --- a/openmp/cmake/modules/LibompUtils.cmake +++ b/openmp/cmake/modules/LibompUtils.cmake @@ -105,6 +105,8 @@ function(libomp_get_legal_arch return_arch_string) set(${return_arch_string} "AARCH64_32" PARENT_SCOPE) elseif(AARCH64_A64FX) set(${return_arch_string} "AARCH64_A64FX" PARENT_SCOPE) + elseif(ARM64EC) + set(${return_arch_string} "ARM64EC" PARENT_SCOPE) elseif(MIPS) set(${return_arch_string} "MIPS" PARENT_SCOPE) elseif(MIPS64) diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt index 39a969731661..680e845ffdc9 100644 --- a/openmp/runtime/CMakeLists.txt +++ b/openmp/runtime/CMakeLists.txt @@ -42,6 +42,8 @@ elseif(LIBOMP_NATIVE_ARCH MATCHES "aarch64_32") set(LIBOMP_ARCH aarch64_32) elseif(LIBOMP_NATIVE_ARCH MATCHES "aarch64") set(LIBOMP_ARCH aarch64) +elseif(LIBOMP_NATIVE_ARCH MATCHES "arm64ec") + set(LIBOMP_ARCH arm64ec) elseif(LIBOMP_NATIVE_ARCH MATCHES "arm64") set(LIBOMP_ARCH aarch64) elseif(LIBOMP_NATIVE_ARCH MATCHES "arm") @@ -79,7 +81,7 @@ if(LIBOMP_ARCH STREQUAL "aarch64") endif() endif() -libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc ppc64 ppc64le aarch64 aarch64_32 aarch64_a64fx mic mips mips64 riscv64 loongarch64 ve s390x sparc sparcv9 wasm32) +libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc ppc64 ppc64le aarch64 aarch64_32 aarch64_a64fx arm64ec mic mips mips64 riscv64 loongarch64 ve s390x sparc sparcv9 wasm32) set(LIBOMP_LIB_TYPE normal CACHE STRING "Performance,Profiling,Stubs library (normal/profile/stubs)") @@ -198,6 +200,8 @@ elseif("${LIBOMP_ARCH}" STREQUAL "aarch64_32") # AARCH64_32 architecture set(AARCH64_32 TRUE) elseif("${LIBOMP_ARCH}" STREQUAL "aarch64_a64fx") # AARCH64_A64FX architecture set(AARCH64_A64FX TRUE) +elseif("${LIBOMP_ARCH}" STREQUAL "arm64ec") # ARM64EC architecture + set(ARM64EC TRUE) elseif("${LIBOMP_ARCH}" STREQUAL "mic") # Intel(R) Many Integrated Core Architecture set(MIC TRUE) elseif("${LIBOMP_ARCH}" STREQUAL "mips") # MIPS architecture diff --git a/openmp/runtime/cmake/LibompGetArchitecture.cmake b/openmp/runtime/cmake/LibompGetArchitecture.cmake index 81aa700e3b6d..56377a397bb4 100644 --- a/openmp/runtime/cmake/LibompGetArchitecture.cmake +++ b/openmp/runtime/cmake/LibompGetArchitecture.cmake @@ -17,6 +17,9 @@ function(libomp_get_architecture return_arch) set(detect_arch_src_txt " #if defined(__KNC__) #error ARCHITECTURE=mic + // arm64ec also defines _M_AMD64 so this needs to be checked before that + #elif defined(_M_ARM64EC) || defined(__arm64ec__) + #error ARCHITECTURE=arm64ec #elif defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) #error ARCHITECTURE=x86_64 #elif defined(__i386) || defined(__i386__) || defined(__IA32__) || defined(_M_I86) || defined(_M_IX86) || defined(__X86__) || defined(_X86_) diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 97ffa2991678..5b3e437ba432 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -86,6 +86,7 @@ else() kmp_csupport.cpp kmp_debug.cpp kmp_itt.cpp + kmp_invoke_microtask.cpp kmp_environment.cpp kmp_error.cpp kmp_global.cpp diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 19deaef75415..ce2ddb01ad1a 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -3902,7 +3902,8 @@ extern void __kmp_check_stack_overlap(kmp_info_t *thr); extern void __kmp_expand_host_name(char *buffer, size_t size); extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern); -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || (KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM)) +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || \ + (KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM || KMP_ARCH_ARM64EC)) extern void __kmp_initialize_system_tick(void); /* Initialize timer tick value */ #endif diff --git a/openmp/runtime/src/kmp_atomic.cpp b/openmp/runtime/src/kmp_atomic.cpp index 261e9f1beee6..d259737eb5ce 100644 --- a/openmp/runtime/src/kmp_atomic.cpp +++ b/openmp/runtime/src/kmp_atomic.cpp @@ -832,7 +832,7 @@ static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs, // end of the first part of the workaround for C78287 #endif // USE_CMPXCHG_FIX -#if KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM) +#if KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM || KMP_ARCH_ARM64EC) // Undo explicit type casts to get MSVC ARM64 to build. Uses // OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG #undef OP_CMPXCHG diff --git a/openmp/runtime/src/kmp_invoke_microtask.cpp b/openmp/runtime/src/kmp_invoke_microtask.cpp new file mode 100644 index 000000000000..f14f27cc06ee --- /dev/null +++ b/openmp/runtime/src/kmp_invoke_microtask.cpp @@ -0,0 +1,140 @@ +#include "kmp.h" + +#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || KMP_ARCH_AARCH64 || \ + KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ + KMP_ARCH_ARM || KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC_XCOFF || \ + KMP_ARCH_AARCH64_32) + +// Because WebAssembly will use `call_indirect` to invoke the microtask and +// WebAssembly indirect calls check that the called signature is a precise +// match, we need to cast each microtask function pointer back from `void *` to +// its original type. +typedef void (*microtask_t0)(int *, int *); +typedef void (*microtask_t1)(int *, int *, void *); +typedef void (*microtask_t2)(int *, int *, void *, void *); +typedef void (*microtask_t3)(int *, int *, void *, void *, void *); +typedef void (*microtask_t4)(int *, int *, void *, void *, void *, void *); +typedef void (*microtask_t5)(int *, int *, void *, void *, void *, void *, + void *); +typedef void (*microtask_t6)(int *, int *, void *, void *, void *, void *, + void *, void *); +typedef void (*microtask_t7)(int *, int *, void *, void *, void *, void *, + void *, void *, void *); +typedef void (*microtask_t8)(int *, int *, void *, void *, void *, void *, + void *, void *, void *, void *); +typedef void (*microtask_t9)(int *, int *, void *, void *, void *, void *, + void *, void *, void *, void *, void *); +typedef void (*microtask_t10)(int *, int *, void *, void *, void *, void *, + void *, void *, void *, void *, void *, void *); +typedef void (*microtask_t11)(int *, int *, void *, void *, void *, void *, + void *, void *, void *, void *, void *, void *, + void *); +typedef void (*microtask_t12)(int *, int *, void *, void *, void *, void *, + void *, void *, void *, void *, void *, void *, + void *, void *); +typedef void (*microtask_t13)(int *, int *, void *, void *, void *, void *, + void *, void *, void *, void *, void *, void *, + void *, void *, void *); +typedef void (*microtask_t14)(int *, int *, void *, void *, void *, void *, + void *, void *, void *, void *, void *, void *, + void *, void *, void *, void *); +typedef void (*microtask_t15)(int *, int *, void *, void *, void *, void *, + void *, void *, void *, void *, void *, void *, + void *, void *, void *, void *, void *); + +// we really only need the case with 1 argument, because CLANG always build +// a struct of pointers to shared variables referenced in the outlined function +int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, + void *p_argv[] +#if OMPT_SUPPORT + , + void **exit_frame_ptr +#endif +) { +#if OMPT_SUPPORT + *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); +#endif + + switch (argc) { + default: + fprintf(stderr, "Too many args to microtask: %d!\n", argc); + fflush(stderr); + exit(-1); + case 0: + (*(microtask_t0)pkfn)(>id, &tid); + break; + case 1: + (*(microtask_t1)pkfn)(>id, &tid, p_argv[0]); + break; + case 2: + (*(microtask_t2)pkfn)(>id, &tid, p_argv[0], p_argv[1]); + break; + case 3: + (*(microtask_t3)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2]); + break; + case 4: + (*(microtask_t4)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], + p_argv[3]); + break; + case 5: + (*(microtask_t5)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], + p_argv[3], p_argv[4]); + break; + case 6: + (*(microtask_t6)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], + p_argv[3], p_argv[4], p_argv[5]); + break; + case 7: + (*(microtask_t7)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], + p_argv[3], p_argv[4], p_argv[5], p_argv[6]); + break; + case 8: + (*(microtask_t8)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], + p_argv[3], p_argv[4], p_argv[5], p_argv[6], + p_argv[7]); + break; + case 9: + (*(microtask_t9)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], + p_argv[3], p_argv[4], p_argv[5], p_argv[6], p_argv[7], + p_argv[8]); + break; + case 10: + (*(microtask_t10)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], + p_argv[3], p_argv[4], p_argv[5], p_argv[6], + p_argv[7], p_argv[8], p_argv[9]); + break; + case 11: + (*(microtask_t11)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], + p_argv[3], p_argv[4], p_argv[5], p_argv[6], + p_argv[7], p_argv[8], p_argv[9], p_argv[10]); + break; + case 12: + (*(microtask_t12)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], + p_argv[3], p_argv[4], p_argv[5], p_argv[6], + p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11]); + break; + case 13: + (*(microtask_t13)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], + p_argv[3], p_argv[4], p_argv[5], p_argv[6], + p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12]); + break; + case 14: + (*(microtask_t14)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], + p_argv[3], p_argv[4], p_argv[5], p_argv[6], + p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12], p_argv[13]); + break; + case 15: + (*(microtask_t15)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], + p_argv[3], p_argv[4], p_argv[5], p_argv[6], + p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12], p_argv[13], p_argv[14]); + break; + } + + return 1; +} + +#endif diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h index e8ad2a6fdb78..c5da19dc2407 100644 --- a/openmp/runtime/src/kmp_os.h +++ b/openmp/runtime/src/kmp_os.h @@ -153,7 +153,7 @@ typedef struct kmp_struct64 kmp_uint64; #undef KMP_USE_X87CONTROL #define KMP_USE_X87CONTROL 1 #endif -#if KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 +#if KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 || KMP_ARCH_ARM64EC #define KMP_INTPTR 1 typedef __int64 kmp_intptr_t; typedef unsigned __int64 kmp_uintptr_t; @@ -183,7 +183,7 @@ typedef unsigned long long kmp_uint64; #define KMP_SIZE_T_SPEC KMP_UINT32_SPEC #elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ - KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_SPARC64 + KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_SPARC64 || KMP_ARCH_ARM64EC #define KMP_SIZE_T_SPEC KMP_UINT64_SPEC #else #error "Can't determine size_t printf format specifier." @@ -464,7 +464,9 @@ enum kmp_mem_fence_type { // Synchronization primitives -#if KMP_ASM_INTRINS && KMP_OS_WINDOWS && !((KMP_ARCH_AARCH64 || KMP_ARCH_ARM) && (KMP_COMPILER_CLANG || KMP_COMPILER_GCC)) +#if KMP_ASM_INTRINS && KMP_OS_WINDOWS && \ + !((KMP_ARCH_AARCH64 || KMP_ARCH_ARM || KMP_ARCH_ARM64EC) && \ + (KMP_COMPILER_CLANG || KMP_COMPILER_GCC)) #if KMP_MSVC_COMPAT && !KMP_COMPILER_CLANG #pragma intrinsic(InterlockedExchangeAdd) @@ -1053,7 +1055,7 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); #if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \ KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32 || \ - KMP_ARCH_SPARC + KMP_ARCH_SPARC || KMP_ARCH_ARM64EC #if KMP_OS_WINDOWS #undef KMP_MB #define KMP_MB() std::atomic_thread_fence(std::memory_order_seq_cst) diff --git a/openmp/runtime/src/kmp_platform.h b/openmp/runtime/src/kmp_platform.h index 609b7c468884..7d88c02e9cf3 100644 --- a/openmp/runtime/src/kmp_platform.h +++ b/openmp/runtime/src/kmp_platform.h @@ -119,6 +119,7 @@ #define KMP_ARCH_X86_64 0 #define KMP_ARCH_AARCH64 0 #define KMP_ARCH_AARCH64_32 0 +#define KMP_ARCH_ARM64EC 0 #define KMP_ARCH_PPC64_ELFv1 0 #define KMP_ARCH_PPC64_ELFv2 0 #define KMP_ARCH_PPC64_XCOFF 0 @@ -133,7 +134,10 @@ #define KMP_ARCH_SPARC 0 #if KMP_OS_WINDOWS -#if defined(_M_AMD64) || defined(__x86_64) +#if defined(_M_ARM64EC) || defined(__arm64ec__) +#undef KMP_ARCH_ARM64EC +#define KMP_ARCH_ARM64EC 1 +#elif defined(_M_AMD64) || defined(__x86_64) #undef KMP_ARCH_X86_64 #define KMP_ARCH_X86_64 1 #elif defined(__aarch64__) || defined(_M_ARM64) @@ -291,7 +295,7 @@ KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 + \ KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH64 + KMP_ARCH_VE + \ KMP_ARCH_S390X + KMP_ARCH_WASM + KMP_ARCH_PPC + \ - KMP_ARCH_AARCH64_32 + KMP_ARCH_SPARC) + KMP_ARCH_AARCH64_32 + KMP_ARCH_SPARC + KMP_ARCH_ARM64EC) #error Unknown or unsupported architecture #endif diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 3277a09c6497..c402645af9ad 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -8924,7 +8924,7 @@ __kmp_determine_reduction_method( #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ - KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM + KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM || KMP_ARCH_ARM64EC #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HAIKU || \ diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h index bd3fd9b43e57..c2120a017f5d 100644 --- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h +++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h @@ -173,6 +173,8 @@ #ifndef ITT_ARCH #if defined _M_IX86 || defined __i386__ #define ITT_ARCH ITT_ARCH_IA32 +#elif defined _M_ARM64EC || defined __arm64ec__ +#define ITT_ARCH ITT_ARCH_ARM64 #elif defined _M_X64 || defined _M_AMD64 || defined __x86_64__ #define ITT_ARCH ITT_ARCH_IA32E #elif defined _M_IA64 || defined __ia64__ diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S index 12fea67e000e..867cb3024921 100644 --- a/openmp/runtime/src/z_Linux_asm.S +++ b/openmp/runtime/src/z_Linux_asm.S @@ -121,7 +121,7 @@ KMP_PREFIX_UNDERSCORE(\proc): # endif // KMP_OS_DARWIN #endif // KMP_ARCH_X86 || KMP_ARCH_x86_64 -#if KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM +#if KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM || KMP_ARCH_ARM64EC # if KMP_OS_DARWIN # define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp index c7fe0642cea6..70a57c3c8f8a 100644 --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -2736,145 +2736,6 @@ finish: // Clean up and exit. #endif // USE_LOAD_BALANCE -#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || KMP_ARCH_AARCH64 || \ - KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ - KMP_ARCH_ARM || KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC_XCOFF || \ - KMP_ARCH_AARCH64_32) - -// Because WebAssembly will use `call_indirect` to invoke the microtask and -// WebAssembly indirect calls check that the called signature is a precise -// match, we need to cast each microtask function pointer back from `void *` to -// its original type. -typedef void (*microtask_t0)(int *, int *); -typedef void (*microtask_t1)(int *, int *, void *); -typedef void (*microtask_t2)(int *, int *, void *, void *); -typedef void (*microtask_t3)(int *, int *, void *, void *, void *); -typedef void (*microtask_t4)(int *, int *, void *, void *, void *, void *); -typedef void (*microtask_t5)(int *, int *, void *, void *, void *, void *, - void *); -typedef void (*microtask_t6)(int *, int *, void *, void *, void *, void *, - void *, void *); -typedef void (*microtask_t7)(int *, int *, void *, void *, void *, void *, - void *, void *, void *); -typedef void (*microtask_t8)(int *, int *, void *, void *, void *, void *, - void *, void *, void *, void *); -typedef void (*microtask_t9)(int *, int *, void *, void *, void *, void *, - void *, void *, void *, void *, void *); -typedef void (*microtask_t10)(int *, int *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *); -typedef void (*microtask_t11)(int *, int *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, - void *); -typedef void (*microtask_t12)(int *, int *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, - void *, void *); -typedef void (*microtask_t13)(int *, int *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, - void *, void *, void *); -typedef void (*microtask_t14)(int *, int *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *); -typedef void (*microtask_t15)(int *, int *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *); - -// we really only need the case with 1 argument, because CLANG always build -// a struct of pointers to shared variables referenced in the outlined function -int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, - void *p_argv[] -#if OMPT_SUPPORT - , - void **exit_frame_ptr -#endif -) { -#if OMPT_SUPPORT - *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); -#endif - - switch (argc) { - default: - fprintf(stderr, "Too many args to microtask: %d!\n", argc); - fflush(stderr); - exit(-1); - case 0: - (*(microtask_t0)pkfn)(>id, &tid); - break; - case 1: - (*(microtask_t1)pkfn)(>id, &tid, p_argv[0]); - break; - case 2: - (*(microtask_t2)pkfn)(>id, &tid, p_argv[0], p_argv[1]); - break; - case 3: - (*(microtask_t3)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2]); - break; - case 4: - (*(microtask_t4)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], - p_argv[3]); - break; - case 5: - (*(microtask_t5)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], - p_argv[3], p_argv[4]); - break; - case 6: - (*(microtask_t6)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], - p_argv[3], p_argv[4], p_argv[5]); - break; - case 7: - (*(microtask_t7)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], - p_argv[3], p_argv[4], p_argv[5], p_argv[6]); - break; - case 8: - (*(microtask_t8)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], - p_argv[3], p_argv[4], p_argv[5], p_argv[6], - p_argv[7]); - break; - case 9: - (*(microtask_t9)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], - p_argv[3], p_argv[4], p_argv[5], p_argv[6], p_argv[7], - p_argv[8]); - break; - case 10: - (*(microtask_t10)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], - p_argv[3], p_argv[4], p_argv[5], p_argv[6], - p_argv[7], p_argv[8], p_argv[9]); - break; - case 11: - (*(microtask_t11)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], - p_argv[3], p_argv[4], p_argv[5], p_argv[6], - p_argv[7], p_argv[8], p_argv[9], p_argv[10]); - break; - case 12: - (*(microtask_t12)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], - p_argv[3], p_argv[4], p_argv[5], p_argv[6], - p_argv[7], p_argv[8], p_argv[9], p_argv[10], - p_argv[11]); - break; - case 13: - (*(microtask_t13)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], - p_argv[3], p_argv[4], p_argv[5], p_argv[6], - p_argv[7], p_argv[8], p_argv[9], p_argv[10], - p_argv[11], p_argv[12]); - break; - case 14: - (*(microtask_t14)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], - p_argv[3], p_argv[4], p_argv[5], p_argv[6], - p_argv[7], p_argv[8], p_argv[9], p_argv[10], - p_argv[11], p_argv[12], p_argv[13]); - break; - case 15: - (*(microtask_t15)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], - p_argv[3], p_argv[4], p_argv[5], p_argv[6], - p_argv[7], p_argv[8], p_argv[9], p_argv[10], - p_argv[11], p_argv[12], p_argv[13], p_argv[14]); - break; - } - - return 1; -} - -#endif - #if KMP_OS_LINUX // Functions for hidden helper task namespace { diff --git a/openmp/runtime/src/z_Windows_NT-586_util.cpp b/openmp/runtime/src/z_Windows_NT-586_util.cpp index 37759feafd45..4ed309365b56 100644 --- a/openmp/runtime/src/z_Windows_NT-586_util.cpp +++ b/openmp/runtime/src/z_Windows_NT-586_util.cpp @@ -12,7 +12,8 @@ #include "kmp.h" -#if (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 || KMP_ARCH_ARM) +#if (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 || KMP_ARCH_ARM || \ + KMP_ARCH_ARM64EC) /* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to use compare_and_store for these routines */ @@ -189,4 +190,5 @@ int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, } #endif -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 || KMP_ARCH_ARM */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 || KMP_ARCH_ARM \ + || KMP_ARCH_ARM64EC */ diff --git a/openmp/runtime/test/misc_bugs/many-microtask-args.c b/openmp/runtime/test/misc_bugs/many-microtask-args.c index d644515d9a4f..48ef2a0222d1 100644 --- a/openmp/runtime/test/misc_bugs/many-microtask-args.c +++ b/openmp/runtime/test/misc_bugs/many-microtask-args.c @@ -1,6 +1,10 @@ // RUN: %libomp-compile-and-run #include +// This test fails on Arm64EC as __kmp_invoke_microtask doesn't support more +// than 16 arguments on that target. +// XFAIL: arm64ec + int main() {